1
1
# frozen_string_literal: true
2
2
3
- # == Schema Information
4
- #
5
- # Table name: detector_suggested_resources
6
- #
7
- # id :integer not null, primary key
8
- # title :string
9
- # url :string
10
- # phrase :string
11
- # fingerprint :string
12
- # created_at :datetime not null
13
- # updated_at :datetime not null
14
- #
15
-
16
3
require 'stringex/core_ext'
17
4
18
5
class Detector
19
- # Detector::SuggestedResource stores custom hints that we want to send to the
20
- # user in response to specific strings. For example, a search for "web of
21
- # science" should be met with our custom login link to Web of Science via MIT.
22
- class SuggestedResource < ApplicationRecord
23
- before_save :update_fingerprint
24
-
25
- def self . table_name_prefix
26
- 'detector_'
27
- end
28
-
29
- # This exists for the before_save lifecycle hook to call the calculate_fingerprint method, to ensure that these
30
- # records always have a correctly-calculated fingerprint. It has no arguments and returns nothing.
31
- def update_fingerprint
32
- self . fingerprint = Detector ::SuggestedResource . calculate_fingerprint ( phrase )
33
- end
34
-
35
- # This implements the OpenRefine fingerprinting algorithm. See
36
- # https://openrefine.org/docs/technical-reference/clustering-in-depth#fingerprint
37
- #
38
- # @param old_phrase [String] A text string which needs to have its fingerprint calculated. This could either be the
39
- # "phrase" field on the SuggestedResource record, or an incoming search term received from a contributing system.
40
- #
41
- # @return [String] A string of all words in the input, downcased, normalized, and alphabetized.
42
- def self . calculate_fingerprint ( old_phrase )
43
- modified_phrase = old_phrase
44
- modified_phrase = modified_phrase . strip
45
- modified_phrase = modified_phrase . downcase
46
-
47
- # This removes all punctuation and symbol characters from the string.
48
- modified_phrase = modified_phrase . gsub ( /\p {P}|\p {S}/ , '' )
49
-
50
- # Normalize to ASCII (e.g. gödel and godel are liable to be intended to
51
- # find the same thing)
52
- modified_phrase = modified_phrase . to_ascii
53
-
54
- # Coercion to ASCII can introduce new symbols, so we remove those now.
55
- modified_phrase = modified_phrase . gsub ( /\p {P}|\p {S}/ , '' )
56
-
57
- # Tokenize
58
- tokens = modified_phrase . split
59
-
60
- # Remove duplicates and sort
61
- tokens = tokens . uniq
62
- tokens = tokens . sort
63
-
64
- # Rejoin tokens
65
- tokens . join ( ' ' )
66
- end
67
-
68
- # This replaces all current Detector::SuggestedResource records with a new set from an imported CSV.
69
- #
70
- # @note This method is called by the suggested_resource:reload rake task.
71
- #
72
- # @param input [CSV::Table] An imported CSV file containing all Suggested Resource records. The CSV file must have
73
- # at least three headers, named "Title", "URL", and "Phrase". Please note: these values
74
- # are case sensitive.
75
- def self . bulk_replace ( input )
76
- raise ArgumentError . new , 'Tabular CSV is required' unless input . instance_of? ( CSV ::Table )
77
-
78
- # Need to check what columns exist in input
79
- required_headers = %w[ Title URL Phrase ]
80
- missing_headers = required_headers - input . headers
81
- raise ArgumentError . new , "Some CSV columns missing: #{ missing_headers } " unless missing_headers . empty?
82
-
83
- Detector ::SuggestedResource . delete_all
84
-
85
- input . each do |line |
86
- record = Detector ::SuggestedResource . new ( { title : line [ 'Title' ] , url : line [ 'URL' ] , phrase : line [ 'Phrase' ] } )
87
- record . save
88
- end
89
- end
90
-
6
+ # Detector::SuggestedResource handles detections for SuggestedResource records.
7
+ class SuggestedResource
91
8
# Identify any SuggestedResource record whose pre-calculated fingerprint matches the fingerprint of the incoming
92
9
# phrase.
93
10
#
@@ -98,7 +15,7 @@ def self.bulk_replace(input)
98
15
#
99
16
# @return [Detector::SuggestedResource] The record whose fingerprint matches that of the search term.
100
17
def self . full_term_match ( phrase )
101
- SuggestedResource . where ( fingerprint : calculate_fingerprint ( phrase ) )
18
+ :: SuggestedResource . joins ( :fingerprints ) . where ( fingerprints : { value : Fingerprint . calculate ( phrase ) } )
102
19
end
103
20
104
21
# Look up any matching Detector::SuggestedResource records, building on the full_term_match method. If a match is
0 commit comments