Skip to content

Commit 7f8bd83

Browse files
Merge pull request #169 from MITLibraries/tco-72
Adds barcode detector and lookup
2 parents 0c576f3 + ea880f1 commit 7f8bd83

22 files changed

+399
-22
lines changed

app/graphql/types/standard_identifiers_type.rb

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,18 @@
22

33
module Types
44
class StandardIdentifiersType < Types::BaseObject
5-
description 'A detector for standard identifiers in search terms. Currently supported: ISBN, ISSN, PMID, DOI'
5+
description 'A detector for standard identifiers in search terms. Currently supported: Barcode, ISBN, ISSN, PMID, DOI'
66

77
field :details, DetailsType, description: 'Additional information about the detected identifier(s)'
8-
field :kind, String, null: false, description: 'The type of identifier detected (one of ISBN, ISSN, PMID, DOI)'
8+
field :kind, String, null: false, description: 'The type of identifier detected (one of Barcode, ISBN, ISSN, PMID, DOI)'
99
field :value, String, null: false, description: 'The identifier detected in the search term'
1010

1111
# details does external lookups and should only be run if the fields
1212
# have been explicitly requested
1313
def details
1414
case @object[:kind]
15+
when :barcode
16+
LookupBarcode.new.info(@object[:value])
1517
when :doi
1618
LookupDoi.new.info(@object[:value])
1719
when :isbn

app/models/detector/standard_identifiers.rb

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# frozen_string_literal: true
22

33
class Detector
4-
# Detector::StandardIdentifiers detects the identifiers DOI, ISBN, ISSN, PMID.
4+
# Detector::StandardIdentifiers detects the identifiers Barcode, DOI, ISBN, ISSN, PMID.
55
# See /docs/reference/pattern_detection_and_enhancement.md for details.
66
class StandardIdentifiers
77
attr_reader :detections
@@ -52,6 +52,7 @@ def self.record(term)
5252
# patterns are regex patterns to be applied to the basic search box input
5353
def patterns
5454
{
55+
barcode: /^39080[0-9]{9}$/,
5556
isbn: /\b(ISBN-*(1[03])* *(: ){0,1})*(([0-9Xx][- ]*){13}|([0-9Xx][- ]*){10})\b/,
5657
issn: /\b[0-9]{4}-[0-9]{3}[0-9xX]\b/,
5758
pmid: /\b((pmid|PMID):\s?(\d{7,8}))\b/,

app/models/lookup_barcode.rb

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# frozen_string_literal: true
2+
3+
# LookupBarcode takes a 14-digit integer (flagged by a regex within the Detector::StandardIdentifier class) and consults
4+
# the Primo API for the associated record. The structure of this class is pretty close to the other lookup models, with
5+
# an info method being the only public method. If Primo finds a record for the submitted barcode, the class returns some
6+
# metadata about the record, along with a link to the complete record using the discovery/fulldisplay path.
7+
class LookupBarcode
8+
# info takes a barcode as an argument and returns associated metadata about that item, provided Primo is able to
9+
# locate it. If no record is found for any reason, the method returns nil.
10+
#
11+
# @note While the barcode argument is technically a string, in reality it should be a 14-digit integer in order to
12+
# return anything meaningful.
13+
# @param barcode String
14+
# @return Hash or Nil
15+
def info(barcode)
16+
xml = fetch(barcode)
17+
18+
return if xml == 'Error'
19+
20+
metadata = extract_metadata(xml)
21+
22+
if metadata.reject { |_k, v| v.empty? }.present?
23+
metadata[:barcode] = barcode
24+
metadata[:link_resolver_url] = link_resolver_url(metadata)
25+
metadata
26+
else
27+
Rails.logger.debug { "Barcode lookup error. Barcode #{barcode} detected by Primo returned no data" }
28+
nil
29+
end
30+
end
31+
32+
private
33+
34+
def extract_metadata(xml)
35+
{
36+
recordId: xml.xpath('//default:recordIdentifier', 'default' => 'http://www.loc.gov/zing/srw/').text,
37+
title: xml.xpath('//dc:title', 'dc' => 'http://purl.org/dc/elements/1.1/').text,
38+
date: xml.xpath('//dc:date', 'dc' => 'http://purl.org/dc/elements/1.1/').text,
39+
publisher: xml.xpath('//dc:publisher', 'dc' => 'http://purl.org/dc/elements/1.1/').text,
40+
authors: xml.xpath('//dc:contributor', 'dc' => 'http://purl.org/dc/elements/1.1/').text
41+
}
42+
end
43+
44+
def url(barcode)
45+
"https://mit.alma.exlibrisgroup.com/view/sru/01MIT_INST?version=1.2&operation=searchRetrieve&recordSchema=dc&query=alma.all_for_ui=#{barcode}"
46+
end
47+
48+
def fetch(barcode)
49+
resp = HTTP.headers(accept: 'application/xml').get(url(barcode))
50+
51+
if resp.status == 200
52+
Nokogiri::XML(resp.to_s)
53+
else
54+
Rails.logger.debug do
55+
"Barcode lookup error. Barcode #{barcode} detected but Primo returned an error status"
56+
end
57+
Rails.logger.debug { "URL: #{url(barcode)}" }
58+
Sentry.capture_message('Primo API error after barcode detection')
59+
'Error'
60+
end
61+
end
62+
63+
def link_resolver_url(metadata)
64+
"https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma#{metadata[:recordId]}"
65+
end
66+
end

app/models/metrics/algorithms.rb

+4-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# suggested_resource_exact :integer
1818
# lcsh :integer
1919
# citation :integer
20+
# barcode :integer
2021
#
2122
module Metrics
2223
# Algorithms aggregates statistics for matches for all SearchEvents
@@ -49,8 +50,8 @@ def generate(month = nil)
4950
else
5051
count_matches(SearchEvent.includes(:term))
5152
end
52-
Metrics::Algorithms.create(month:, citation: matches[:citation], doi: matches[:doi], issn: matches[:issn],
53-
isbn: matches[:isbn], lcsh: matches[:lcsh], pmid: matches[:pmid],
53+
Metrics::Algorithms.create(month:, barcode: matches[:barcode], citation: matches[:citation], doi: matches[:doi],
54+
issn: matches[:issn], isbn: matches[:isbn], lcsh: matches[:lcsh], pmid: matches[:pmid],
5455
journal_exact: matches[:journal_exact],
5556
suggested_resource_exact: matches[:suggested_resource_exact],
5657
unmatched: matches[:unmatched])
@@ -122,7 +123,7 @@ def match_lcsh(event, matches)
122123
# @param matches [Hash] a Hash that keeps track of how many of each algorithm we match
123124
# @return [Array] an array of matched StandardIdentifiers
124125
def match_standard_identifiers(event, matches)
125-
known_ids = %i[unmatched pmid isbn issn doi]
126+
known_ids = %i[unmatched pmid isbn issn doi barcode]
126127
ids = Detector::StandardIdentifiers.new(event.term.phrase)
127128

128129
known_ids.each do |id|

app/views/report/algorithm_metrics.html.erb

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
<% else %>
1616
<th>Month</th>
1717
<% end %>
18+
<th>Barcode</th>
1819
<th>DOI</th>
1920
<th>ISSN</th>
2021
<th>ISBN</th>
@@ -31,6 +32,7 @@
3132
<% else %>
3233
<td><%= metric.month.strftime("%B %Y") %></td>
3334
<% end %>
35+
<td><%= metric.barcode %></td>
3436
<td><%= metric.doi %></td>
3537
<td><%= metric.issn %></td>
3638
<td><%= metric.isbn %></td>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class AddBarcodeToMetricsAlgorithms < ActiveRecord::Migration[7.1]
2+
def change
3+
add_column :metrics_algorithms, :barcode, :integer
4+
end
5+
end

db/schema.rb

+2-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

db/seeds.rb

+6
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,14 @@
4141
Detector.find_or_create_by(name: 'Journal')
4242
Detector.find_or_create_by(name: 'SuggestedResource')
4343
Detector.find_or_create_by(name: 'Citation')
44+
Detector.find_or_create_by(name: 'Barcode')
4445

4546
# DetectorCategories
47+
DetectorCategory.find_or_create_by(
48+
detector: Detector.find_by(name: 'Barcode'),
49+
category: Category.find_by(name: 'Transactional'),
50+
confidence: 0.95
51+
)
4652
DetectorCategory.find_or_create_by(
4753
detector: Detector.find_by(name: 'Citation'),
4854
category: Category.find_by(name: 'Transactional'),

test/controllers/graphql_controller_test.rb

+24
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,30 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest
7272
assert_equal('10.1038/nphys1170', json['data']['logSearchEvent']['detectors']['standardIdentifiers'].first['value'])
7373
end
7474

75+
test 'search event query can return detected barcodes' do
76+
VCR.use_cassette('barcode 39080027236626') do
77+
post '/graphql', params: { query: '{
78+
logSearchEvent(sourceSystem: "timdex", searchTerm: "39080027236626") {
79+
detectors {
80+
standardIdentifiers {
81+
kind
82+
value
83+
details {
84+
title
85+
}
86+
}
87+
}
88+
}
89+
}' }
90+
91+
json = response.parsed_body
92+
93+
assert_equal('barcode', json['data']['logSearchEvent']['detectors']['standardIdentifiers'].first['kind'])
94+
assert_equal('39080027236626', json['data']['logSearchEvent']['detectors']['standardIdentifiers'].first['value'])
95+
assert_equal('Transactions of the Institution of Naval Architects.', json['data']['logSearchEvent']['detectors']['standardIdentifiers'].first['details']['title'])
96+
end
97+
end
98+
7599
test 'search event query can return detected journals' do
76100
post '/graphql', params: { query: '{
77101
logSearchEvent(sourceSystem: "timdex", searchTerm: "nature") {

test/fixtures/detector_categories.yml

+5
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,8 @@ seven:
4343
detector: citation
4444
category: transactional
4545
confidence: 0.3
46+
47+
eight:
48+
detector: barcode
49+
category: transactional
50+
confidence: 0.95

test/fixtures/detectors.yml

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
# created_at :datetime not null
88
# updated_at :datetime not null
99
#
10+
barcode:
11+
name: 'Barcode'
12+
1013
citation:
1114
name: 'Citation'
1215

test/fixtures/fingerprints.yml

+6
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,9 @@ multiple_detections:
3939

4040
citation:
4141
value: '12 2 2005 2007 6 a accessed altun available context current dec education experience httpcieedasueduvolume6number12 hypertext in issues july language learners no of on online reading serial the understanding vol web'
42+
43+
barcode:
44+
value: 39080678901234
45+
46+
not_a_barcode:
47+
value: '39080678901234 extra some text with'

test/fixtures/search_events.yml

+7
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,10 @@ old_suggested_resource_jstor:
5555
term: suggested_resource_jstor
5656
source: test
5757
created_at: <%= 1.year.ago %>
58+
current_month_barcode:
59+
term: barcode
60+
source: test
61+
old_barcode:
62+
term: barcode
63+
source: test
64+
created_at: <%= 1.year.ago %>

test/fixtures/terms.yml

+8
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,11 @@ multiple_detections:
5757
citation:
5858
phrase: "A. Altun, &quot;Understanding hypertext in the context of reading on the web: Language learners' experience,&quot; Current Issues in Education, vol. 6, no. 12, July, 2005. [Online serial]. Available: http://cie.ed.asu.edu/volume6/number12/. [Accessed Dec. 2, 2007]."
5959
fingerprint: citation
60+
61+
barcode:
62+
phrase: '39080678901234'
63+
fingerprint: barcode
64+
65+
not_a_barcode:
66+
phrase: '39080678901234 with some extra text'
67+
fingerprint: not_a_barcode

test/models/detector/bulk_checker_test.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class CitationTest < ActiveSupport::TestCase
2525
test 'standard_identifier_bulk_checker' do
2626
bulk = Detector::StandardIdentifiers.check_all_matches(output: true)
2727

28-
assert_equal(5, bulk.count)
28+
assert_equal(6, bulk.count)
2929
end
3030

3131
test 'suggested_resources_bulk_checker' do

test/models/lookup_barcode_test.rb

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# frozen_string_literal: true
2+
3+
require 'test_helper'
4+
5+
class LookupBarcodeTest < ActiveSupport::TestCase
6+
test 'metadata object is returned with expected fields' do
7+
VCR.use_cassette('barcode 39080027236626') do
8+
metadata = LookupBarcode.new.info('39080027236626')
9+
10+
expected_keys = %i[title date publisher authors link_resolver_url]
11+
12+
expected_keys.each do |key|
13+
assert_includes(metadata.keys, key)
14+
end
15+
end
16+
end
17+
18+
test 'link resolver URL returns a simple item URL' do
19+
VCR.use_cassette('barcode 39080027236626') do
20+
metadata = LookupBarcode.new.info('39080027236626')
21+
22+
expected_url = 'https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990002933430106761'
23+
24+
assert_equal(expected_url, metadata[:link_resolver_url])
25+
end
26+
end
27+
28+
test 'barcode not found' do
29+
VCR.use_cassette('barcode not found') do
30+
metadata = LookupBarcode.new.info('this-is-not-a-barcode')
31+
32+
assert_nil(metadata)
33+
end
34+
end
35+
end

test/models/metrics/algorithms_test.rb

+25
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,18 @@
1717
# suggested_resource_exact :integer
1818
# lcsh :integer
1919
# citation :integer
20+
# barcode :integer
2021
#
2122
require 'test_helper'
2223

2324
class Algorithms < ActiveSupport::TestCase
2425
# Monthlies
26+
test 'barcode counts are included in monthly aggregation' do
27+
aggregate = Metrics::Algorithms.new.generate(DateTime.now)
28+
29+
assert_equal 1, aggregate.barcode
30+
end
31+
2532
test 'citation counts are included in monthly aggregation' do
2633
aggregate = Metrics::Algorithms.new.generate(DateTime.now)
2734

@@ -92,6 +99,11 @@ class Algorithms < ActiveSupport::TestCase
9299
# drop all searchevents to make math easier and minimize fragility over time as more fixtures are created
93100
SearchEvent.delete_all
94101

102+
barcode_expected_count = rand(1...100)
103+
barcode_expected_count.times do
104+
SearchEvent.create(term: terms(:barcode), source: 'test')
105+
end
106+
95107
citation_expected_count = rand(1...100)
96108
citation_expected_count.times do
97109
SearchEvent.create(term: terms(:citation), source: 'test')
@@ -129,6 +141,7 @@ class Algorithms < ActiveSupport::TestCase
129141

130142
aggregate = Metrics::Algorithms.new.generate(DateTime.now)
131143

144+
assert_equal barcode_expected_count, aggregate.barcode
132145
assert_equal citation_expected_count, aggregate.citation
133146
assert_equal doi_expected_count, aggregate.doi
134147
assert_equal issn_expected_count, aggregate.issn
@@ -139,6 +152,12 @@ class Algorithms < ActiveSupport::TestCase
139152
end
140153

141154
# Total
155+
test 'barcode counts are included in total aggregation' do
156+
aggregate = Metrics::Algorithms.new.generate
157+
158+
assert_equal 2, aggregate.barcode
159+
end
160+
142161
test 'citation counts are included in total aggregation' do
143162
aggregate = Metrics::Algorithms.new.generate
144163

@@ -197,6 +216,11 @@ class Algorithms < ActiveSupport::TestCase
197216
# drop all searchevents to make math easier and minimize fragility over time as more fixtures are created
198217
SearchEvent.delete_all
199218

219+
barcode_expected_count = rand(1...100)
220+
barcode_expected_count.times do
221+
SearchEvent.create(term: terms(:barcode), source: 'test')
222+
end
223+
200224
citation_expected_count = rand(1...100)
201225
citation_expected_count.times do
202226
SearchEvent.create(term: terms(:citation), source: 'test')
@@ -239,6 +263,7 @@ class Algorithms < ActiveSupport::TestCase
239263

240264
aggregate = Metrics::Algorithms.new.generate
241265

266+
assert_equal barcode_expected_count, aggregate.barcode
242267
assert_equal citation_expected_count, aggregate.citation
243268
assert_equal doi_expected_count, aggregate.doi
244269
assert_equal issn_expected_count, aggregate.issn

test/test_helper.rb

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@
3838
header&.each do |redacted_text|
3939
interaction.filter!(redacted_text, '<REDACTED_NEL>')
4040
end
41+
42+
header = interaction.response&.headers&.[]('Set-Cookie')
43+
header&.each do |redacted_text|
44+
interaction.filter!(redacted_text, '<FAKE_COOKIE_DATA>')
45+
end
4146
end
4247
end
4348

0 commit comments

Comments
 (0)