Skip to content

Commit c207615

Browse files
authored
Merge pull request #207 from MITLibraries/tco-141-regexable-suggested-resources
Adds Suggested Resource detection based on regex patterns
2 parents 19c74be + 643ed50 commit c207615

20 files changed

+341
-25
lines changed

app/controllers/demo_controller.rb

+1
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@ def detections
2323
@detections[:lcsh] = Detector::Lcsh.new(@searchterm).detections
2424
@detections[:standard_identifiers] = Detector::StandardIdentifiers.new(@searchterm).detections
2525
@detections[:suggested_resources] = Detector::SuggestedResource.full_term_match(@searchterm)
26+
@detections[:suggested_resources_patterns] = Detector::SuggestedResourcePattern.new(@searchterm)
2627
end
2728
end

app/graphql/types/detectors_type.rb

+12-1
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,19 @@ def standard_identifiers
2525
end
2626
end
2727

28+
# Prefer Term based SuggestedResources over Pattern Based Suggested Resources
2829
def suggested_resources
29-
Detector::SuggestedResource.full_term_match(@object).map do |suggested_resource|
30+
traditional_suggested_resources.presence || pattern_based_suggested_resources
31+
end
32+
33+
def traditional_suggested_resources
34+
Detector::SuggestedResource.full_term_match(@object) do |suggested_resource|
35+
{ title: suggested_resource.title, url: suggested_resource.url }
36+
end
37+
end
38+
39+
def pattern_based_suggested_resources
40+
Detector::SuggestedResourcePattern.new(@object).detections do |suggested_resource|
3041
{ title: suggested_resource.title, url: suggested_resource.url }
3142
end
3243
end

app/models/detector/bulk_checker.rb

+12-7
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ class Detector
55
# singleton class to access it
66
# See also: `PatternChecker` for shared instance methods
77
module BulkChecker
8-
# This method is intended to be used for inspecting detections during development.
8+
# check_all_matches is intended to be used for inspecting detections during development.
99
# Assumptions include
1010
# - the Class including this module implements a `detections` method (either via `attr_reader` or as a method)
1111
# that is only populated for Terms in which it has made a detection
@@ -24,14 +24,19 @@ def check_all_matches(output: false)
2424
matches.push [t.phrase, d.detections]
2525
end
2626

27-
if Rails.env.development?
28-
Rails.logger.ap matches
29-
30-
Rails.logger.ap "Total Terms: #{Term.count}"
31-
Rails.logger.ap "Total Matches: #{count}"
32-
end
27+
log_summary(matches) if Rails.env.development?
3328

3429
matches if output
3530
end
31+
32+
# log_summary formats and logs information collected in check_all_matches
33+
#
34+
# @param matches [array]. matches should be an array of [phrase, detections]
35+
def log_summary(matches)
36+
Rails.logger.info(ap(matches))
37+
38+
Rails.logger.info "Total Terms : #{Term.count}"
39+
Rails.logger.info "Total Matches: #{matches.count}"
40+
end
3641
end
3742
end

app/models/detector/standard_identifiers.rb

-4
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@ class Detector
66
class StandardIdentifiers
77
attr_reader :detections
88

9-
def self.table_name_prefix
10-
'detector_'
11-
end
12-
139
# shared instance methods
1410
include Detector::PatternChecker
1511

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# frozen_string_literal: true
2+
3+
class Detector
4+
# Detector::SuggestedResourcePattern handles detections for patterns stored in our SuggestedPattern model
5+
class SuggestedResourcePattern
6+
attr_reader :detections
7+
8+
# shared singleton methods
9+
extend Detector::BulkChecker
10+
11+
def initialize(phrase)
12+
@detections = {}
13+
check_patterns(phrase)
14+
end
15+
16+
# check_patterns loops through all stored patterns from SuggestedPattern model, checks to see if they produce
17+
# matches for the incoming `phrase`, and if so creates a Hash with useful data
18+
#
19+
# @note Not using shared PatternChecker as we want to include additional data in the returned object
20+
# @param phrase [String]. A string representation of a searchterm (not an actual Term object)
21+
# @return primarily intended to add matches to @detections
22+
def check_patterns(phrase)
23+
sps = []
24+
SuggestedPattern.find_each do |sp|
25+
next unless Regexp.new(sp.pattern).match(phrase)
26+
27+
sps << {
28+
shortcode: sp.shortcode,
29+
title: sp.title,
30+
url: sp.url
31+
}
32+
@detections = sps
33+
end
34+
end
35+
36+
# The record method will consult the set of regex-based detectors that are defined in
37+
# SuggestedPattern records. Any matches will be registered as Detection records.
38+
#
39+
# @note There are multiple patterns within SuggestedPattern records. Each check is capable of generating
40+
# a separate Detection record.
41+
#
42+
# @return nil
43+
def self.record(term)
44+
sp = Detector::SuggestedResourcePattern.new(term.phrase)
45+
46+
sp.detections.each do
47+
Detection.find_or_create_by(
48+
term:,
49+
detector: Detector.where(name: 'SuggestedResourcePattern').first,
50+
detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')
51+
)
52+
end
53+
54+
nil
55+
end
56+
end
57+
end

app/models/metrics/algorithms.rb

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# suggested_resource_exact :integer
1818
# lcsh :integer
1919
# citation :integer
20+
# barcode :integer
2021
#
2122
module Metrics
2223
# Algorithms aggregates statistics for matches for all SearchEvents

app/models/suggested_pattern.rb

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# frozen_string_literal: true
2+
3+
# == Schema Information
4+
#
5+
# Table name: suggested_patterns
6+
#
7+
# id :integer not null, primary key
8+
# title :string not null
9+
# url :string not null
10+
# pattern :string not null
11+
# shortcode :string not null
12+
# created_at :datetime not null
13+
# updated_at :datetime not null
14+
#
15+
class SuggestedPattern < ApplicationRecord
16+
validates :title, presence: true
17+
validates :url, presence: true
18+
validates :pattern, presence: true, uniqueness: true
19+
validates :shortcode, presence: true, uniqueness: true
20+
end

app/models/term.rb

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def record_detections
5555
Detector::Journal.record(self)
5656
Detector::Lcsh.record(self)
5757
Detector::SuggestedResource.record(self)
58+
Detector::SuggestedResourcePattern.record(self)
5859

5960
nil
6061
end

app/views/demo/view.html.erb

+12
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,15 @@
108108
<% else %>
109109
<p>No suggested resoure found.</p>
110110
<% end %>
111+
112+
<h2>Suggested Resource (Patterns)</h2>
113+
<% if @detections[:suggested_resources_patterns].detections.present? %>
114+
<p>Suggested resoure patterns found.</p>
115+
<% @detections[:suggested_resources_patterns].detections.each do |sr| %>
116+
<h3><%= sr[:title] %></h3>
117+
<p>URL: <%= link_to(sr[:url], sr[:url]) %></p>
118+
<% end %>
119+
120+
<% else %>
121+
<p>No suggested resoure patterns found.</p>
122+
<% end %>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
class AddSuggestedPatterns < ActiveRecord::Migration[7.2]
2+
def change
3+
create_table :suggested_patterns do |t|
4+
t.string :title, null: false
5+
t.string :url, null: false
6+
t.string :pattern, null: false
7+
t.string :shortcode, null: false
8+
9+
t.timestamps
10+
end
11+
12+
add_index :suggested_patterns, :pattern, unique: true
13+
add_index :suggested_patterns, :shortcode, unique: true
14+
end
15+
end

db/schema.rb

+12-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

db/seeds.rb

+33
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
Detector.find_or_create_by(name: 'SuggestedResource')
4343
Detector.find_or_create_by(name: 'Citation')
4444
Detector.find_or_create_by(name: 'Barcode')
45+
Detector.find_or_create_by(name: 'SuggestedResourcePattern')
4546

4647
# DetectorCategories
4748
DetectorCategory.find_or_create_by(
@@ -85,4 +86,36 @@
8586
confidence: 0.2
8687
)
8788

89+
# Patterns for Suggested Resources
90+
SuggestedPattern.find_or_create_by(
91+
title: 'Looking for Standards?',
92+
url: 'https://libguides.mit.edu/standards',
93+
pattern: '(IEC|iec)(\\s)(\\d{5})',
94+
shortcode: 'iec'
95+
)
96+
SuggestedPattern.find_or_create_by(
97+
title: 'Looking for Standards?',
98+
url: 'https://libguides.mit.edu/standards',
99+
pattern: '(ASCE|asce)(\\s)(\\d)',
100+
shortcode: 'asce'
101+
)
102+
SuggestedPattern.find_or_create_by(
103+
title: 'Looking for Standards?',
104+
url: 'https://libguides.mit.edu/standards',
105+
pattern: '(IEEE|ieee)\\s+(?:Std\\s+)?([PC]?[0-9]{3,4})',
106+
shortcode: 'ieee'
107+
)
108+
SuggestedPattern.find_or_create_by(
109+
title: 'Looking for Standards?',
110+
url: 'https://libguides.mit.edu/standards',
111+
pattern: '(ISO|iso)\\s(\\d{1,5})',
112+
shortcode: 'iso'
113+
)
114+
SuggestedPattern.find_or_create_by(
115+
title: 'Looking for Standards?',
116+
url: 'https://libguides.mit.edu/standards',
117+
pattern: '(ASTM|astm)\\s',
118+
shortcode: 'astm'
119+
)
120+
88121
Rails.logger.info('Seeding DB complete')

test/fixtures/detectors.yml

+3
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,6 @@ journal:
3333

3434
suggestedresource:
3535
name: 'SuggestedResource'
36+
37+
suggestedresourcepattern:
38+
name: 'SuggestedResourcePattern'

test/fixtures/fingerprints.yml

+2
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,5 @@ web_of_knowledge:
5555
nobel_laureate:
5656
value: 'bawendi moungi'
5757

58+
astm:
59+
value: 'astm 1'

test/fixtures/suggested_patterns.yml

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# == Schema Information
2+
#
3+
# Table name: suggested_patterns
4+
#
5+
# id :integer not null, primary key
6+
# title :string not null
7+
# url :string not null
8+
# pattern :string not null
9+
# shortcode :string not null
10+
# created_at :datetime not null
11+
# updated_at :datetime not null
12+
#
13+
14+
astm:
15+
title: Looking for ASTM Standards?
16+
url: 'https://example.com/standards'
17+
pattern: '(ASTM|astm)\s'
18+
shortcode: astm

test/fixtures/terms.yml

+3
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,6 @@ nobel_laureate:
8383
fingerprint: nobel_laureate
8484
suggested_resource: nobel_laureate
8585

86+
astm:
87+
phrase: 'astm 1'
88+
fingerprint: astm
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# frozen_string_literal: true
2+
3+
require 'test_helper'
4+
5+
class Detector
6+
class SuggestedResourcePatternTest < ActiveSupport::TestCase
7+
test 'pattern matches return as expected' do
8+
match = SuggestedResourcePattern.new('astm standard thing and stuff')
9+
10+
assert_predicate(match.detections, :present?)
11+
end
12+
13+
test 'no patterns detected return as expected' do
14+
match = SuggestedResourcePattern.new('hello!')
15+
16+
assert_not_predicate(match.detections, :present?)
17+
end
18+
19+
test 'record does relevant work' do
20+
detection_count = Detection.count
21+
t = terms('astm')
22+
Detector::SuggestedResourcePattern.record(t)
23+
24+
assert_equal(detection_count + 1, Detection.count)
25+
end
26+
27+
test 'record does nothing when not needed' do
28+
detection_count = Detection.count
29+
t = terms('journal_nature_medicine')
30+
31+
Detector::SuggestedResourcePattern.record(t)
32+
33+
assert_equal(detection_count, Detection.count)
34+
end
35+
36+
test 'record respects changes to the DETECTOR_VERSION value' do
37+
# Create a relevant detection
38+
t = terms('astm')
39+
Detector::SuggestedResourcePattern.record(t)
40+
41+
detection_count = Detection.count
42+
43+
# Calling the record method again doesn't do anything, but does not error.
44+
Detector::SuggestedResourcePattern.record(t)
45+
46+
assert_equal(detection_count, Detection.count)
47+
48+
# Calling the record method after DETECTOR_VERSION is incremented results in a new Detection
49+
ClimateControl.modify DETECTOR_VERSION: 'updated' do
50+
Detector::SuggestedResourcePattern.record(t)
51+
52+
assert_equal detection_count + 1, Detection.count
53+
end
54+
end
55+
end
56+
end

test/models/detector/suggested_resource_test.rb

-12
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,5 @@
11
# frozen_string_literal: true
22

3-
# == Schema Information
4-
#
5-
# Table name: suggested_resources
6-
#
7-
# id :integer not null, primary key
8-
# title :string
9-
# url :string
10-
# phrase :string
11-
# fingerprint :string
12-
# created_at :datetime not null
13-
# updated_at :datetime not null
14-
#
153
require 'test_helper'
164

175
class Detector

test/models/metrics/algorithms_test.rb

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# suggested_resource_exact :integer
1818
# lcsh :integer
1919
# citation :integer
20+
# barcode :integer
2021
#
2122
require 'test_helper'
2223

0 commit comments

Comments
 (0)