Skip to content

Commit 3635472

Browse files
Reworking the event import worker
1 parent 320905d commit 3635472

File tree

9 files changed

+208
-31
lines changed

9 files changed

+208
-31
lines changed

Gemfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ gem "git", "~> 1.5"
2424
gem "faraday", "~> 2.9"
2525
gem "faraday_middleware-aws-sigv4", "~> 0.3.0"
2626
gem "faraday-excon"
27+
gem "uuid", "~> 2.3", ">= 2.3.9"
2728

2829
# This gem will allow us to write tests without the need for a database
2930
gem "activerecord-nulldb-adapter", "~> 1.1", ">= 1.1.1"

Gemfile.lock

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ GEM
5050
erubi (~> 1.11)
5151
rails-dom-testing (~> 2.2)
5252
rails-html-sanitizer (~> 1.6)
53+
active_model_serializers (0.10.15)
54+
actionpack (>= 4.1)
55+
activemodel (>= 4.1)
56+
case_transform (>= 0.2)
57+
jsonapi-renderer (>= 0.1.1.beta1, < 0.3)
5358
activejob (7.1.5.1)
5459
activesupport (= 7.1.5.1)
5560
globalid (>= 0.3.6)
@@ -107,6 +112,8 @@ GEM
107112
bundler-audit (0.9.2)
108113
bundler (>= 1.2.0, < 3)
109114
thor (~> 1.0)
115+
case_transform (0.2)
116+
activesupport
110117
concurrent-ruby (1.3.5)
111118
connection_pool (2.5.0)
112119
crass (1.0.6)
@@ -166,6 +173,7 @@ GEM
166173
reline (>= 0.4.2)
167174
jmespath (1.6.2)
168175
json (2.10.2)
176+
jsonapi-renderer (0.2.2)
169177
language_server-protocol (3.17.0.4)
170178
lint_roller (1.1.0)
171179
logger (1.7.0)
@@ -180,6 +188,8 @@ GEM
180188
loofah (2.24.0)
181189
crass (~> 1.0.2)
182190
nokogiri (>= 1.12.0)
191+
macaddr (1.7.2)
192+
systemu (~> 2.6.5)
183193
mail (2.8.1)
184194
mini_mime (>= 0.1.1)
185195
net-imap
@@ -335,6 +345,7 @@ GEM
335345
concurrent-ruby
336346
thor
337347
stringio (3.1.5)
348+
systemu (2.6.5)
338349
thor (1.3.2)
339350
timeout (0.4.3)
340351
tzinfo (2.0.6)
@@ -343,6 +354,8 @@ GEM
343354
unicode-emoji (~> 4.0, >= 4.0.4)
344355
unicode-emoji (4.0.4)
345356
uri (1.0.3)
357+
uuid (2.3.9)
358+
macaddr (~> 1.0)
346359
websocket-driver (0.7.7)
347360
base64
348361
websocket-extensions (>= 0.1.0)
@@ -355,6 +368,7 @@ PLATFORMS
355368
x86_64-linux
356369

357370
DEPENDENCIES
371+
active_model_serializers (~> 0.10.0)
358372
activerecord-nulldb-adapter (~> 1.1, >= 1.1.1)
359373
aws-sdk-sqs (~> 1.3)
360374
bootsnap
@@ -388,6 +402,7 @@ DEPENDENCIES
388402
sentry-rails
389403
sentry-ruby
390404
shoryuken (~> 4.0)
405+
uuid (~> 2.3, >= 2.3.9)
391406

392407
RUBY VERSION
393408
ruby 3.1.6p260

app/factories/event_factory.rb

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
module EventFactory
2+
# Creates and returns an event from the dequeued message from the events SQS queue.
3+
def self.create_from_sqs(message)
4+
now = Time.now.utc
5+
6+
Event.new(
7+
uuid: message["uuid"] || SecureRandom.uuid,
8+
subj_id: DoiUtilities.normalize_doi(message["subjId"]) || message["subjId"],
9+
obj_id: DoiUtilities.normalize_doi(message["objId"]) || message["objId"],
10+
source_id: message["sourceId"],
11+
aasm_state: "waiting",
12+
source_token: message["sourceToken"],
13+
created_at: now,
14+
updated_at: now,
15+
total: message["total"] || 1,
16+
occurred_at: message["occurred_at"] || now,
17+
message_action: "create",
18+
relation_type_id: message["relation_type_id"] || "references",
19+
subj: message["subj"].to_json,
20+
obj: message["obj"].to_json,
21+
license: message["license"] || "https://creativecommons.org/publicdomain/zero/1.0/",
22+
)
23+
end
24+
end
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# frozen_string_literal: true
2+
3+
# Handles setting the source and target doi data of an event when an event is created or updated.
4+
# Method is only invoked via a before_validation callback in the events model.
5+
6+
module RelationTypeHandler
7+
extend ActiveSupport::Concern
8+
9+
def set_source_and_target_doi!
10+
return if subj_id.blank? || obj_id.blank?
11+
12+
case relation_type_id
13+
when *REFERENCE_RELATION_TYPES
14+
self.source_doi = DoiUtilities.uppercase_doi_from_url(subj_id)
15+
self.target_doi = DoiUtilities.uppercase_doi_from_url(obj_id)
16+
self.source_relation_type_id = "references"
17+
self.target_relation_type_id = "citations"
18+
when *CITATION_RELATION_TYPES
19+
self.source_doi = DoiUtilities.uppercase_doi_from_url(obj_id)
20+
self.target_doi = DoiUtilities.uppercase_doi_from_url(subj_id)
21+
self.source_relation_type_id = "references"
22+
self.target_relation_type_id = "citations"
23+
when "unique-dataset-investigations-regular"
24+
self.target_doi = DoiUtilities.uppercase_doi_from_url(obj_id)
25+
self.target_relation_type_id = "views"
26+
when "unique-dataset-requests-regular"
27+
self.target_doi = DoiUtilities.uppercase_doi_from_url(obj_id)
28+
self.target_relation_type_id = "downloads"
29+
when "has-version"
30+
self.source_doi = DoiUtilities.uppercase_doi_from_url(subj_id)
31+
self.target_doi = DoiUtilities.uppercase_doi_from_url(obj_id)
32+
self.source_relation_type_id = "versions"
33+
self.target_relation_type_id = "version_of"
34+
when "is-version-of"
35+
self.source_doi = DoiUtilities.uppercase_doi_from_url(obj_id)
36+
self.target_doi = DoiUtilities.uppercase_doi_from_url(subj_id)
37+
self.source_relation_type_id = "versions"
38+
self.target_relation_type_id = "version_of"
39+
when "has-part"
40+
self.source_doi = DoiUtilities.uppercase_doi_from_url(subj_id)
41+
self.target_doi = DoiUtilities.uppercase_doi_from_url(obj_id)
42+
self.source_relation_type_id = "parts"
43+
self.target_relation_type_id = "part_of"
44+
when "is-part-of"
45+
self.source_doi = DoiUtilities.uppercase_doi_from_url(obj_id)
46+
self.target_doi = DoiUtilities.uppercase_doi_from_url(subj_id)
47+
self.source_relation_type_id = "parts"
48+
self.target_relation_type_id = "part_of"
49+
end
50+
end
51+
52+
REFERENCE_RELATION_TYPES = [
53+
"cites",
54+
"is-supplemented-by",
55+
"references",
56+
].freeze
57+
58+
CITATION_RELATION_TYPES = [
59+
"is-cited-by",
60+
"is-supplement-to",
61+
"is-referenced-by",
62+
].freeze
63+
end

app/models/event.rb

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# frozen_string_literal: true
22

33
class Event < ApplicationRecord
4+
include RelationTypeHandler
45
# include Modelable
56
# include Identifiable
67
# include Elasticsearch::Model
@@ -9,17 +10,16 @@ class Event < ApplicationRecord
910
attribute :uuid, :text
1011
attribute :subj_id, :text
1112
attribute :obj_id, :text
12-
attribute :source_id, :string
13-
attribute :aasm_state, :string # could we remove this
14-
attribute :state_event, :string # could we remove this
13+
attribute :aasm_state, :string
14+
attribute :state_event, :string
1515
attribute :callback, :text
1616
attribute :error_messages, :text
1717
attribute :source_token, :text
1818
attribute :created_at, :datetime
1919
attribute :updated_at, :datetime
2020
attribute :indexed_at, :datetime, default: -> { Time.zone.at(0) }
2121
attribute :occurred_at, :datetime
22-
attribute :message_action, :string, default: "create" # how is this set?
22+
attribute :message_action, :string, default: "create"
2323
attribute :subj, :text
2424
attribute :obj, :text
2525
attribute :total, :integer, default: 1
@@ -28,15 +28,19 @@ class Event < ApplicationRecord
2828
attribute :target_doi, :text
2929
attribute :source_relation_type_id, :string
3030
attribute :target_relation_type_id, :string
31+
attribute :relation_type_id, :string
3132

3233
# Validations
33-
validates :uuid, presence: true, uniqueness: {
34-
case_sensitive: false,
35-
length: { maximum: 36 },
36-
}
34+
validates :uuid, presence: true, uuid_format: true, uniqueness: { case_sensitive: false, length: { maximum: 36 } }
3735
validates :subj_id, presence: true
36+
validates :obj_id, presence: true
37+
validates :source_id, presence: true
38+
validates :source_token, presence: true
3839
validates :message_action, presence: true, length: { maximum: 191 }
3940
validates :created_at, presence: true
4041
validates :updated_at, presence: true
4142
validates :indexed_at, presence: true
43+
44+
# Callbacks
45+
before_validation :set_source_and_target_doi!
4246
end

app/utilities/doi_utilities.rb

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# frozen_string_literal: true
2+
3+
module DoiUtilities
4+
def self.normalize_doi(url)
5+
doi =
6+
Array(%r{\A(?:(http|https):/(/)?(dx\.)?(doi.org|handle.test.datacite.org)/)?(doi:)?(10\.\d{4,5}/.+)\z}
7+
.match(doi)).last
8+
9+
doi = doi.delete("\u200B").downcase if doi.present?
10+
11+
"https://doi.org/#{doi}" if doi.present?
12+
end
13+
14+
def self.uppercase_doi_from_url(url)
15+
if %r{\A(?:(http|https)://(dx\.)?(doi.org|handle.test.datacite.org)/)?(doi:)?(10\.\d{4,5}/.+)\z}
16+
.match?(url)
17+
18+
uri = Addressable::URI.parse(url)
19+
20+
uri.path.gsub(%r{^/}, "").upcase
21+
end
22+
end
23+
end
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# frozen_string_literal: true
2+
3+
class UuidFormatValidator < ActiveModel::EachValidator
4+
def validate_each(record, attribute, value)
5+
unless UUID.validate(value)
6+
record.errors.add(attribute, "#{value} is not a valid UUID")
7+
end
8+
end
9+
end

app/workers/event_import_worker.rb

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,77 @@ class EventImportWorker
88
def perform(sqs_message = nil, data = nil)
99
log_prefix = "[Events:EventImportWorker]"
1010

11-
if data.blank?
11+
event_data = event_data(data)
12+
13+
if event_data.nil?
1214
Rails.logger.info("#{log_prefix} Message data was blank")
1315
return
1416
end
1517

16-
log_identifier = "subj_id: #{data["subjId"]}, " \
17-
"obj_id: #{data["objId"]}, " \
18-
"source_id: #{data["sourceId"]}, " \
19-
"relation_type_id: #{data["relationTypeId"]}"
18+
log_identifier = log_identifier(event_data)
2019

2120
Rails.logger.info("#{log_prefix} Start of event message processing for #{log_identifier}")
22-
Rails.logger.info("#{log_prefix} Searching for event with #{log_identifier}")
21+
Rails.logger.info("#{log_prefix} Searching for event for #{log_identifier}")
2322

24-
event = Event.find_by(
25-
subj_id: data["subjId"],
26-
obj_id: data["objId"],
27-
source_id: data["sourceId"],
28-
relation_type_id: data["relationTypeId"],
29-
)
23+
event = find_event(event_data)
3024

31-
if event.blank?
32-
Rails.logger.info("#{log_prefix} Creating a new event with #{log_identifier}")
25+
if event.nil?
26+
create_event(event_data, log_prefix, log_identifier)
3327
else
34-
Rails.logger.info("#{log_prefix} Update an existing event with #{log_identifier}")
28+
update_event(event, event_data, log_prefix, log_identifier)
3529
end
3630

37-
Rails.logger.info("#{log_prefix} End of event message processing for #{log_identifier}")
31+
Rails.logger.info("#{log_prefix} Completed processing event for #{log_identifier}")
32+
end
33+
34+
private
35+
36+
# Returns the SQS event data as a hash.
37+
# Will return nil if either a data or attributes field is missing.
38+
def event_data(data)
39+
data_hash = JSON.parse(data)
40+
41+
data_hash.dig("data", "attributes")
42+
end
43+
44+
# Returns a string which serves as an identifier for this event import worker run.
45+
# Consists of the event subj_id, obj_id, source_id and relation_type_id.
46+
def log_identifier(event_data)
47+
"subj_id: #{event_data["subjId"]}, " \
48+
"obj_id: #{event_data["objId"]}, " \
49+
"source_id: #{event_data["sourceId"]}, " \
50+
"relation_type_id: #{event_data["relationTypeId"]}"
51+
end
52+
53+
# Searchs for an existing event using subj_id, obj_id, source_id and relation_type_id
54+
def find_event(event_data)
55+
Event.find_by(
56+
subj_id: event_data["subjId"],
57+
obj_id: event_data["objId"],
58+
source_id: event_data["sourceId"],
59+
relation_type_id: event_data["relationTypeId"],
60+
)
61+
end
62+
63+
def create_event(event_data, log_prefix, log_identifier)
64+
Rails.logger.info("#{log_prefix} Creating a new event for #{log_identifier}")
65+
66+
event = EventFactory.create_from_sqs(event_data)
67+
68+
if event.save
69+
Rails.logger.info("#{log_prefix} Event successfully created for #{log_identifier}")
70+
elsif event.errors.any?
71+
Rails.logger.error("#{log_prefix} Creating event failed for #{log_identifier}: #{event.errors.inspect}")
72+
end
73+
end
74+
75+
def update_event(event, event_data, log_prefix, log_identifier)
76+
Rails.logger.info("#{log_prefix} Update an existing event for #{log_identifier}")
77+
78+
if event.update(data)
79+
Rails.logger.info("#{log_prefix} Event successfully updated for #{log_identifier}")
80+
else
81+
Rails.logger.error("#{log_prefix} Updating event failed for #{log_identifier}: #{event.errors.inspect}")
82+
end
3883
end
3984
end

config/initializers/sentry.rb

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,3 @@
2525
event
2626
end
2727
end
28-
# Raven.configure do |config|
29-
# config.environments = ["stage", "production"]
30-
# config.dsn = ENV["SENTRY_DSN"]
31-
# config.release = "events:" + Events::Application::VERSION
32-
# config.sanitize_fields = Rails.application.config.filter_parameters.map(&:to_s)
33-
# config.logger = Rails.application.config.lograge.logger
34-
# end

0 commit comments

Comments
 (0)