Skip to content

Commit c94bf23

Browse files
authored
Merge pull request #183 from MITLibraries/libkey
Add LibKey API option for DOI and PMID metadata
2 parents a8d97e1 + f7b4cbb commit c94bf23

15 files changed

+537
-6
lines changed

.env.test

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
DETECTOR_VERSION=1
22
LINKRESOLVER_BASEURL=https://mit.primo.exlibrisgroup.com/discovery/openurl?institution=01MIT_INST&rfr_id=info:sid/mit.tacos.api&vid=01MIT_INST:MIT
33
TACOS_EMAIL=[email protected]
4+
LIBKEY_KEY=FAKE_LIBKEY_KEY
5+
LIBKEY_ID=FAKE_LIBKEY_ID

README.md

+5
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ changes, this is the signal which indicates that terms need to be re-evaluated.
6969

7070
### Optional
7171

72+
`LIBKEY_KEY`: LibKey API key. Required if `LIBKEY_DOI` or `LIBKEY_PMID` are set.
73+
`LIBKEY_ID`: LibKey Library ID. Required if `LIBKEY_DOI` or `LIBKEY_PMID` are set.
74+
`LIBKEY_DOI`: If set, use LibKey for DOI metadata lookups. If not set, Unpaywall is used.
75+
`LIBKEY_PMID`: If set, use LibKey for PMID metadata lookups. If not set, NCBI Entrez is used.
76+
7277
`PLATFORM_NAME`: The value set is added to the header after the MIT Libraries logo. The logic and CSS for this comes
7378
from our theme gem.
7479

app/graphql/types/details_type.rb

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@ class DetailsType < Types::BaseObject
77
field :date, String
88
field :doi, String
99
field :issns, [String]
10+
field :journal_image, String
11+
field :journal_link, String
1012
field :journal_name, String
1113
field :link_resolver_url, String
1214
field :oa, Boolean
1315
field :oa_status, String
16+
field :pmid, String
1417
field :publisher, String
1518
field :title, String
1619

@@ -19,7 +22,7 @@ def issns
1922
end
2023

2124
def authors
22-
@object[:authors]&.split(',')
25+
@object[:authors]&.split(';')
2326
end
2427
end
2528
end

app/graphql/types/standard_identifiers_type.rb

+19-1
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,30 @@ def details
1515
when :barcode
1616
LookupBarcode.new.info(@object[:value])
1717
when :doi
18-
LookupDoi.new.info(@object[:value])
18+
doi
1919
when :isbn
2020
LookupIsbn.new.info(@object[:value])
2121
when :issn
2222
LookupIssn.new.info(@object[:value])
2323
when :pmid
24+
pmid
25+
end
26+
end
27+
28+
# doi handles determining which doi lookup method to use
29+
def doi
30+
if ENV.fetch('LIBKEY_DOI', false)
31+
LookupLibkey.info(doi: @object[:value])
32+
else
33+
LookupDoi.new.info(@object[:value])
34+
end
35+
end
36+
37+
# pmid handles determining which pmid lookup method to use
38+
def pmid
39+
if ENV.fetch('LIBKEY_PMID', false)
40+
LookupLibkey.info(pmid: @object[:value])
41+
else
2442
LookupPmid.new.info(@object[:value].split.last)
2543
end
2644
end

app/models/detector/standard_identifiers.rb

+7
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def initialize(phrase)
2626
pattern_checker(phrase)
2727
strip_invalid_issns
2828
strip_invalid_isbns
29+
strip_pmid_prefix
2930
end
3031

3132
# The record method will consult the set of regex-based detectors that are defined in
@@ -63,6 +64,12 @@ def patterns
6364
}
6465
end
6566

67+
# strip_pmid_prefix removes the PMID:/pmid: prefix from the detected value. The regex needs that, but the
68+
# actual value of the identifier should not include those prefixes.
69+
def strip_pmid_prefix
70+
@detections[:pmid] = @detections[:pmid].gsub(/pmid:|PMID:/, '').strip if @detections[:pmid].present?
71+
end
72+
6673
# strip_invalid_isbns coordinates the logic to remove ISBNs that are not valid from our list of detected ISBNs
6774
#
6875
# ISBNs cannot be validated via regex. Regex gives us a list of candidates that look like ISBNs. We remove invalid

app/models/lookup_barcode.rb

+9-1
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,18 @@ def extract_metadata(xml)
3737
title: xml.xpath('//dc:title', 'dc' => 'http://purl.org/dc/elements/1.1/').text,
3838
date: xml.xpath('//dc:date', 'dc' => 'http://purl.org/dc/elements/1.1/').text,
3939
publisher: xml.xpath('//dc:publisher', 'dc' => 'http://purl.org/dc/elements/1.1/').text,
40-
authors: xml.xpath('//dc:contributor', 'dc' => 'http://purl.org/dc/elements/1.1/').text
40+
authors: authors(xml)
4141
}
4242
end
4343

44+
def authors(xml)
45+
authors = []
46+
xml.xpath('//dc:contributor', 'dc' => 'http://purl.org/dc/elements/1.1/').each do |author|
47+
authors << author.text
48+
end
49+
authors.join(';')
50+
end
51+
4452
def url(barcode)
4553
"https://mit.alma.exlibrisgroup.com/view/sru/01MIT_INST?version=1.2&operation=searchRetrieve&recordSchema=dc&query=alma.all_for_ui=#{barcode}"
4654
end

app/models/lookup_isbn.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def fetch_authors(isbn_json)
3232
json = parse_response(url)
3333
json['name']
3434
end
35-
author_names.join(' ; ')
35+
author_names.join(';')
3636
end
3737

3838
def parse_response(url)

app/models/lookup_libkey.rb

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# frozen_string_literal: true
2+
3+
# LookupLibkey can take a DOI or PMID and return metadata, link resolver links, and journal browse links.
4+
class LookupLibkey
5+
BASEURL = 'https://public-api.thirdiron.com/public/v1/libraries'
6+
7+
# Info is the main entry point into the LookupLibkey Class.
8+
#
9+
# @param doi [String]
10+
# @param pmid [String]
11+
# @return [Hash] or nil
12+
def self.info(doi: nil, pmid: nil)
13+
return unless expected_env?
14+
15+
if doi.present?
16+
external_url = construct_url(doi:)
17+
Rails.logger.debug(external_url)
18+
external_data = fetch(external_url)
19+
return if external_data == 'Error'
20+
21+
extract_metadata(external_data)
22+
elsif pmid.present?
23+
external_url = construct_url(pmid:)
24+
Rails.logger.debug(external_url)
25+
26+
external_data = fetch(external_url)
27+
return if external_data == 'Error'
28+
29+
extract_metadata(external_data)
30+
else
31+
Rails.logger.error('No doi or pmid provided to LookupLibkey')
32+
nil
33+
end
34+
end
35+
36+
# expected_env? confirms both required variables are set
37+
#
38+
# @return Boolean
39+
def self.expected_env?
40+
Rails.logger.error('No LIBKEY_KEY set') if libkey_key.nil?
41+
42+
Rails.logger.error('No LIBKEY_ID set') if libkey_id.nil?
43+
44+
libkey_id.present? && libkey_key.present?
45+
end
46+
47+
# using method instead of constant to allow for mutating in testing without causing sporadic failures
48+
def self.libkey_key
49+
ENV.fetch('LIBKEY_KEY', nil)
50+
end
51+
52+
# using method instead of constant to allow for mutating in testing without causing sporadic failures
53+
def self.libkey_id
54+
ENV.fetch('LIBKEY_ID', nil)
55+
end
56+
57+
# extract_metadata maps data from the LibKey response to an internal hash
58+
#
59+
# @return Hash
60+
def self.extract_metadata(external_data)
61+
{
62+
title: external_data['data']['title'],
63+
authors: external_data['data']['authors'].gsub('; ', ';'),
64+
doi: external_data['data']['doi'],
65+
pmid: external_data['data']['pmid'],
66+
oa: external_data['data']['openAccess'],
67+
date: external_data['data']['date'],
68+
journal_name: external_data['included'].first['title'],
69+
journal_issns: external_data['included'].first['issn'],
70+
journal_image: external_data['included'].first['coverImageUrl'],
71+
journal_link: external_data['included'].first['browzineWebLink'],
72+
link_resolver_url: external_data['data']['bestIntegratorLink']['bestLink']
73+
}
74+
end
75+
76+
# https://thirdiron.atlassian.net/wiki/spaces/BrowZineAPIDocs/pages/65929220/BrowZine+Public+API+Overview
77+
# https://thirdiron.atlassian.net/wiki/spaces/BrowZineAPIDocs/pages/65699928/Article+DOI+PMID+Lookup+Endpoint+LibKey
78+
# public/v1/libraries/:library_id/articles/doi/:article_doi?access_token=ffffffff-ffff-ffff-ffff-ffffffffffff
79+
# /public/v1/libraries/:library_id/articles/pmid/:article_pmid?access_token=ffffffff-ffff-ffff-ffff-ffffffffffff
80+
def self.construct_url(doi: nil, pmid: nil)
81+
if doi.present?
82+
"#{BASEURL}/#{libkey_id}/articles/doi/#{doi}?include=journal&access_token=#{libkey_key}"
83+
elsif pmid.present?
84+
"#{BASEURL}/#{libkey_id}/articles/pmid/#{pmid}?include=journal&access_token=#{libkey_key}"
85+
else
86+
Rails.logger.error('No PMID or DOI provided to LookupLibkey.url()')
87+
nil
88+
end
89+
end
90+
91+
# Fetch performs the HTTP calls, parses JSON for successful requests.
92+
def self.fetch(url)
93+
resp = HTTP.headers(accept: 'application/json').get(url)
94+
if resp.status == 200
95+
JSON.parse(resp.to_s)
96+
else
97+
Rails.logger.debug do
98+
'Fact lookup error. DOI or PMID detected but LibKey returned no data or otherwise errored'
99+
end
100+
Rails.logger.debug { "Response status: #{resp.status}" }
101+
Rails.logger.debug { "URL: #{url}" }
102+
'Error'
103+
end
104+
end
105+
end

test/models/detector/standard_identifiers_test.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ class StandardIdentifiersTest < ActiveSupport::TestCase
195195
test 'pmid detected in string' do
196196
actual = Detector::StandardIdentifiers.new('Citation and stuff PMID: 35648703 more stuff.').detections
197197

198-
assert_equal('PMID: 35648703', actual[:pmid])
198+
assert_equal('35648703', actual[:pmid])
199199
end
200200

201201
test 'pmid examples' do
@@ -204,7 +204,7 @@ class StandardIdentifiersTest < ActiveSupport::TestCase
204204
samples.each do |pmid|
205205
actual = Detector::StandardIdentifiers.new(pmid).detections
206206

207-
assert_equal(pmid, actual[:pmid])
207+
assert_equal(pmid.gsub(/PMID:|pmid:/, '').strip, actual[:pmid])
208208
end
209209
end
210210

test/models/lookup_libkey_test.rb

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# frozen_string_literal: true
2+
3+
require 'test_helper'
4+
5+
class LookupLibkeyTest < ActiveSupport::TestCase
6+
test 'metadata object is returned with expected fields for dois' do
7+
VCR.use_cassette('libkey doi 10.1038/d41586-023-03497-2') do
8+
metadata = LookupLibkey.info(doi: '10.1038/d41586-023-03497-2')
9+
10+
expected_keys = %i[title authors doi pmid oa date journal_name journal_issns journal_image journal_link
11+
link_resolver_url]
12+
13+
expected_keys.each do |key|
14+
assert_includes(metadata.keys, key)
15+
end
16+
end
17+
end
18+
19+
test 'metadata object is returned with expected fields for pmids' do
20+
VCR.use_cassette('libkey pmid 10490598') do
21+
metadata = LookupLibkey.info(pmid: '10490598')
22+
23+
expected_keys = %i[title authors doi pmid oa date journal_name journal_issns journal_image journal_link
24+
link_resolver_url]
25+
26+
expected_keys.each do |key|
27+
assert_includes(metadata.keys, key)
28+
end
29+
end
30+
end
31+
32+
test 'link resolver url returns expected value for dois' do
33+
VCR.use_cassette('libkey doi 10.1038/d41586-023-03497-2') do
34+
metadata = LookupLibkey.info(doi: '10.1038/d41586-023-03497-2')
35+
36+
expected_url = 'https://libkey.io/libraries/FAKE_LIBKEY_ID/articles/594388926/full-text-file?utm_source=api_3735'
37+
38+
assert_equal(expected_url, metadata[:link_resolver_url])
39+
end
40+
end
41+
42+
test 'link resolver url returns expected value for pmids' do
43+
VCR.use_cassette('libkey pmid 10490598') do
44+
metadata = LookupLibkey.info(pmid: '10490598')
45+
46+
expected_url = 'https://libkey.io/libraries/FAKE_LIBKEY_ID/articles/56753128/full-text-file?utm_source=api_3735'
47+
48+
assert_equal(expected_url, metadata[:link_resolver_url])
49+
end
50+
end
51+
52+
test 'doi or pmid are required' do
53+
error = assert_raises(ArgumentError) do
54+
LookupLibkey.info('no pmid or doi argument name given')
55+
end
56+
assert_equal 'wrong number of arguments (given 1, expected 0)', error.message
57+
end
58+
59+
test 'LIBKEY_KEY is required' do
60+
ClimateControl.modify LIBKEY_KEY: nil do
61+
assert_nil(LookupLibkey.info(doi: '10.1038/d41586-023-03497-2'))
62+
end
63+
end
64+
65+
test 'LIBKEY_ID is required' do
66+
ClimateControl.modify LIBKEY_ID: nil do
67+
assert_nil(LookupLibkey.info(doi: '10.1038/d41586-023-03497-2'))
68+
end
69+
end
70+
71+
test 'blank doi and blank pmid returns nil' do
72+
metadata = LookupLibkey.info(doi: '')
73+
74+
assert_nil(metadata)
75+
76+
metadata = LookupLibkey.info(pmid: '')
77+
78+
assert_nil(metadata)
79+
80+
metadata = LookupLibkey.info(pmid: '', doi: '')
81+
82+
assert_nil(metadata)
83+
end
84+
85+
test 'contstruct url for doi' do
86+
expected_url = 'https://public-api.thirdiron.com/public/v1/libraries/FAKE_LIBKEY_ID/articles/doi/my_doi?include=journal&access_token=FAKE_LIBKEY_KEY'
87+
actual_url = LookupLibkey.construct_url(doi: 'my_doi')
88+
89+
assert_equal(expected_url, actual_url)
90+
end
91+
92+
test 'contstruct url for pmid' do
93+
expected_url = 'https://public-api.thirdiron.com/public/v1/libraries/FAKE_LIBKEY_ID/articles/pmid/my_pmid?include=journal&access_token=FAKE_LIBKEY_KEY'
94+
actual_url = LookupLibkey.construct_url(pmid: 'my_pmid')
95+
96+
assert_equal(expected_url, actual_url)
97+
end
98+
99+
test 'construct url with no pmid or do returns nil' do
100+
actual_url = LookupLibkey.construct_url
101+
102+
assert_nil(actual_url)
103+
end
104+
105+
test 'invalid doi lookup returns nil' do
106+
VCR.use_cassette('libkey doi nonsense') do
107+
metadata = LookupLibkey.info(doi: 'nonsense')
108+
109+
assert_nil(metadata)
110+
end
111+
end
112+
113+
test 'invalid pmid lookup returns nil' do
114+
VCR.use_cassette('libkey pmid nonsense') do
115+
metadata = LookupLibkey.info(pmid: 'nonsense')
116+
117+
assert_nil(metadata)
118+
end
119+
end
120+
end

test/test_helper.rb

+9
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@
2323
ENV.fetch('TACOS_EMAIL', nil).to_s
2424
end
2525

26+
# Filter Libkey Key
27+
config.filter_sensitive_data('FAKE_LIBKEY_KEY') do
28+
ENV.fetch('LIBKEY_KEY', nil).to_s
29+
end
30+
# Filter LibKey ID
31+
config.filter_sensitive_data('FAKE_LIBKEY_ID') do
32+
ENV.fetch('LIBKEY_ID', nil).to_s
33+
end
34+
2635
config.before_record do |interaction|
2736
header = interaction.response&.headers&.[]('Report-To')
2837
header&.each do |redacted_text|

0 commit comments

Comments
 (0)