Skip to content

Commit bee2cf7

Browse files
authored
Merge pull request #1358 from sanger/y24-096-publish-volume-tracking-message
Y24-096 publish volume tracking message
2 parents b9e116d + 268f4bd commit bee2cf7

File tree

18 files changed

+972
-5
lines changed

18 files changed

+972
-5
lines changed

Gemfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ git_source(:github) { |repo| "https://github.com/#{repo}.git" }
55

66
ruby '3.3.4'
77

8+
gem 'avro'
89
gem 'bootsnap', '>= 1.1.0', require: false # Reduces boot times through caching
910
gem 'bunny'
1011
gem 'exception_notification'
@@ -42,6 +43,7 @@ group :development, :test do
4243
gem 'simplecov', require: false
4344
gem 'simplecov-lcov', require: false
4445
gem 'sqlite3'
46+
gem 'webmock'
4547
end
4648

4749
gem 'flipper', '~> 0.25.0'

Gemfile.lock

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,12 @@ GEM
7575
minitest (>= 5.1)
7676
mutex_m
7777
tzinfo (~> 2.0)
78+
addressable (2.8.7)
79+
public_suffix (>= 2.0.2, < 7.0)
7880
amq-protocol (2.3.2)
7981
ast (2.4.2)
82+
avro (1.11.3)
83+
multi_json (~> 1.0)
8084
base64 (0.2.0)
8185
bigdecimal (3.1.8)
8286
bootsnap (1.18.3)
@@ -90,6 +94,9 @@ GEM
9094
coderay (1.1.3)
9195
concurrent-ruby (1.3.3)
9296
connection_pool (2.4.1)
97+
crack (1.0.0)
98+
bigdecimal
99+
rexml
93100
crass (1.0.6)
94101
database_cleaner (2.0.2)
95102
database_cleaner-active_record (>= 2, < 3)
@@ -126,6 +133,7 @@ GEM
126133
sanitize (< 7)
127134
globalid (1.2.1)
128135
activesupport (>= 6.1)
136+
hashdiff (1.1.0)
129137
i18n (1.14.5)
130138
concurrent-ruby (~> 1.0)
131139
io-console (0.7.2)
@@ -155,6 +163,7 @@ GEM
155163
mini_portile2 (2.8.7)
156164
minitest (5.24.1)
157165
msgpack (1.7.2)
166+
multi_json (1.15.0)
158167
mutex_m (0.2.0)
159168
mysql2 (0.5.6)
160169
net-imap (0.4.12)
@@ -181,6 +190,7 @@ GEM
181190
pry (>= 0.13.0)
182191
psych (5.1.2)
183192
stringio
193+
public_suffix (6.0.0)
184194
puma (6.4.2)
185195
nio4r (~> 2.0)
186196
racc (1.8.0)
@@ -314,6 +324,10 @@ GEM
314324
tzinfo (2.0.6)
315325
concurrent-ruby (~> 1.0)
316326
unicode-display_width (2.5.0)
327+
webmock (3.23.1)
328+
addressable (>= 2.8.0)
329+
crack (>= 0.3.2)
330+
hashdiff (>= 0.4.0, < 2.0.0)
317331
webrick (1.8.1)
318332
websocket-driver (0.7.6)
319333
websocket-extensions (>= 0.1.0)
@@ -325,6 +339,7 @@ PLATFORMS
325339
ruby
326340

327341
DEPENDENCIES
342+
avro
328343
bootsnap (>= 1.1.0)
329344
bunny
330345
byebug
@@ -354,6 +369,7 @@ DEPENDENCIES
354369
spring
355370
spring-watcher-listen
356371
sqlite3
372+
webmock
357373
yard
358374

359375
RUBY VERSION

app/exchanges/data_structure_builder.rb

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,51 @@ def build_children(object, field)
3838
# * [constant] - Takes the constant and applies the method chain
3939
# to it e.g DateTime.now
4040
# * [array] - usually an array of fields
41-
def instance_value(object, field, parent) # rubocop:disable Metrics/MethodLength
41+
# * [self] - applies to the method to the current (builder) object
42+
def instance_value(object, field, parent) # # rubocop:disable Metrics/MethodLength
4243
case field[:type]
4344
when :string
4445
field[:value]
4546
when :model
46-
evaluate_method_chain(object, field[:value].split('.'))
47+
evaluate_field(object, field[:value])
4748
when :parent_model
48-
evaluate_method_chain(parent, field[:value].split('.'))
49+
evaluate_field(parent, field[:value])
4950
when :constant
5051
evaluate_method_chain(field[:value].split('.').first.constantize,
5152
field[:value].split('.')[1..])
5253
when :array
5354
build_children(object, field)
55+
when :self
56+
evaluate_field(self, field[:value])
57+
end
58+
end
59+
60+
def evaluate_field(object, field_value)
61+
if field_value.include?('&.')
62+
evaluate_safe_navigation(object, field_value.split('&.'))
63+
else
64+
evaluate_method_chain(object, field_value.split('.'))
5465
end
5566
end
5667

5768
# we need to do this via try as certain fields may be nil
5869
def evaluate_method_chain(object, chain)
5970
chain.inject(object) { |o, meth| o.try(:send, meth) }
6071
end
72+
73+
def evaluate_safe_navigation(object, chain)
74+
chain.inject(object) do |obj, meth|
75+
break nil unless obj
76+
77+
if meth.include?('.')
78+
evaluate_method_chain(obj, meth.split('.'))
79+
elsif obj.is_a?(Hash)
80+
# Handle both hash and object cases
81+
key = meth.to_sym
82+
obj.key?(key) ? obj[key] : obj[meth.to_s]
83+
else
84+
obj.try(:send, meth)
85+
end
86+
end
87+
end
6188
end
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# frozen_string_literal: true
2+
3+
module VolumeTracking
4+
# Message::Message
5+
# Creates a message in the correct structure for the warehouse
6+
class MessageBuilder < Message::Message
7+
# Produces the message in the correct format
8+
# Example:
9+
# {"limsId"=>"Traction",
10+
# "messageCreateDateUtc"=>Mon, 15 Jul 2024 15:16:54.877858000 UTC +00:00,
11+
# "messageUuid"=>"0a62ee15-bbf6-46f0-ba95-01d42622d076",
12+
# "recordedAt"=>Mon, 15 Jul 2024 15:16:54.867713000 UTC +00:00,
13+
# "volume"=>1.5, "concentration"=>10.0, "insertSize"=>100, "aliquotType"=>"primary",
14+
# "limsUuid"=>"", "sourceType"=>"library", "sourceBarcode"=>"TRAC-2-35805",
15+
# "sampleName"=>"Sample1", "usedByBarcode"=>"TRAC-2-35806", "usedByType"=>"pool"}}
16+
17+
def publish_data # rubocop:disable Metrics/MethodLength
18+
# Memoize the data
19+
return @publish_data if defined?(@publish_data)
20+
21+
aliquot = object
22+
data = { source_type: '', source_barcode: '', sample_name: '',
23+
used_by_type: 'nil', used_by_barcode: '', lims_uuid: aliquot.id.to_s || '' }
24+
25+
case aliquot.source_type
26+
when 'Pacbio::Library'
27+
data[:source_type] = 'library'
28+
data[:source_barcode] = aliquot.source.tube.barcode
29+
data[:sample_name] = aliquot.source.sample_name
30+
end
31+
32+
case aliquot.used_by_type
33+
when 'Pacbio::Well'
34+
data[:used_by_type] = 'well'
35+
data[:used_by_barcode] =
36+
"#{aliquot.used_by.plate.sequencing_kit_box_barcode}:#{aliquot.used_by.plate.plate_number}:#{aliquot.used_by.position}" # rubocop:disable Layout/LineLength
37+
when 'Pacbio::Pool'
38+
data[:used_by_type] = 'pool'
39+
data[:used_by_barcode] = aliquot.used_by.tube.barcode
40+
end
41+
@publish_data = data
42+
end
43+
end
44+
end

app/messages/emq/encoder.rb

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# frozen_string_literal: true
2+
3+
require 'net/http' # Add this line to require Net::HTTP
4+
require 'avro'
5+
require 'fileutils'
6+
7+
module Emq
8+
# This class should be responsible for encoding messages using an Avro schema
9+
# stored in RedPanda registry
10+
class Encoder
11+
attr_reader :schema_config, :validate_obj
12+
13+
# Initialize the validator with the subject, version and registry URL
14+
# @param [String] subject the subject of the schema
15+
# @param [String] version the version of the schema
16+
# @param [String] registry_url the URL of the schema registry
17+
def initialize(subject, version, registry_url)
18+
@subject = subject
19+
@version = version
20+
@registry_url = registry_url
21+
end
22+
23+
# Encode a message using the schema
24+
# @param [Hash] message the message to encode
25+
# @return [String] the encoded message
26+
def encode_message(message) # rubocop:disable Metrics/MethodLength
27+
# Create schema the schema to use for encoding
28+
schema = create_message_schema
29+
begin
30+
schema = Avro::Schema.parse(schema)
31+
rescue Avro::SchemaParseError => e
32+
Rails.logger.error("Schema parsing error: <#{e.message}>. Schema: #{schema}")
33+
raise
34+
end
35+
stream = StringIO.new
36+
writer = Avro::IO::DatumWriter.new(schema)
37+
encoder = Avro::IO::BinaryEncoder.new(stream)
38+
encoder.write("\xC3\x01") # Avro single-object container file header
39+
encoder.write([schema.crc_64_avro_fingerprint].pack('Q')) # 8 byte schema fingerprint
40+
writer.write(message, encoder)
41+
stream.string
42+
rescue StandardError => e
43+
Rails.logger.error("Error validating volume tracking message: <#{e.message}>")
44+
raise
45+
end
46+
47+
private
48+
49+
# Create the message schema
50+
# @return [String] the schema for the message
51+
def create_message_schema
52+
# Prefer to use the cached schema if it exists.
53+
cache_file_path = "data/avro_schema_cache/#{@subject}_v#{@version}.avsc"
54+
if File.exist?(cache_file_path)
55+
Rails.logger.debug { "Using cached schema for #{@subject} v#{@version}" }
56+
return File.read(cache_file_path)
57+
end
58+
59+
# Default to fetching the schema from the registry and caching it.
60+
Rails.logger.debug { "Fetching and caching schema for #{@subject} v#{@version}" }
61+
response = fetch_response("#{@registry_url}#{@subject}/versions/#{@version}")
62+
resp_json = JSON.parse(response.body)
63+
schema_str = resp_json['schema']
64+
# Ensure the directory exists
65+
FileUtils.mkdir_p(File.dirname(cache_file_path))
66+
File.write(cache_file_path, schema_str)
67+
schema_str
68+
end
69+
70+
# Fetch the response from the URL
71+
# @param [String] uri_str the URL to fetch
72+
# @param [Integer] limit the number of redirects to follow
73+
# @return [Net::HTTPResponse] the response
74+
def fetch_response(uri_str, limit = 10)
75+
raise IOError, 'Too many HTTP redirects' if limit == 0
76+
77+
response = Net::HTTP.get_response(URI.parse(uri_str))
78+
79+
case response
80+
when Net::HTTPSuccess then response
81+
when Net::HTTPRedirection then fetch_response(response['location'], limit - 1)
82+
else
83+
response.error!
84+
end
85+
end
86+
end
87+
end

app/messages/emq/publisher.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# frozen_string_literal: true
2+
3+
# This class should be responsible for sending messages to the EMQ which are validated
4+
# against an Avro schema stored in the RedPanda registry before being sent
5+
module Emq::Publisher
6+
# Initialize the publisher with the bunny configuration
7+
def self.publish_job
8+
return @publish_job if defined?(@publish_job)
9+
10+
@publish_job = Emq::PublishingJob.new if Rails.configuration.bunny['enabled']
11+
end
12+
13+
# Publish a message to the EMQ
14+
def self.publish(aliquots, configuration, schema_key)
15+
return if publish_job.nil?
16+
17+
publish_job.publish(aliquots, configuration, schema_key)
18+
end
19+
end

app/messages/emq/publishing_job.rb

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# frozen_string_literal: true
2+
3+
require 'ostruct'
4+
5+
module Emq
6+
# This class should be responsible for publishing messages to the EMQ which are validated
7+
# against an Avro schema stored in the RedPanda registry before being sent
8+
class PublishingJob
9+
attr_reader :bunny_config
10+
11+
# The prefix for the key which contains the version of the Avro schema to use
12+
# by the message builder
13+
AVRO_SCHEMA_VERSION_KEY = 'avro_schema_version_'
14+
15+
# Initialize the publishing job with the bunny configuration
16+
def initialize
17+
# Load the bunny configuration from the Rails configuration and convert it to an OpenStruct
18+
@bunny_config = PublishingJob.deep_open_struct(Rails.configuration.bunny)
19+
end
20+
21+
# Publish a message to the EMQ
22+
# @param [Object] objects the object or objects to publish
23+
# @param [Object] the pipeline configuration to construct
24+
# the message to publish from the given object(s)
25+
# @param [String] schema_key the key of the schema to validate the message against
26+
# Note:-
27+
# The schema_key must exist within the subjects hash of the bunny configuration and
28+
# must also have a matching configuration within the pipeline settings.
29+
# (See the 'volume_tracking' section in config/pipelines/pacbio.yml for reference.)
30+
# Any messages published using publishing_job require a corresponding entry in the
31+
# pipeline configuration, identified by the schema key.
32+
#
33+
def publish(objects, message_config, schema_key) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
34+
# Check if the schema_key exists in the subjects hash and return early if it does not
35+
schema = bunny_config.amqp.schemas.subjects[schema_key]
36+
return if schema.nil?
37+
38+
# Get the subject and version from the schema and return early if either is nil
39+
subject = bunny_config.amqp.schemas.subjects[schema_key].subject
40+
version = bunny_config.amqp.schemas.subjects[schema_key].version
41+
return if subject.nil? || version.nil?
42+
43+
# Get the message builder configuration for the schema key and version
44+
# and create a message builder class from the configuration
45+
message_builder_config_obj = message_builder_config(message_config, schema_key, version)
46+
if message_builder_config_obj.nil?
47+
Rails.logger.error("Message builder configuration not found for schema key: #{schema_key} and version: #{version}") # rubocop:disable Layout/LineLength
48+
return
49+
end
50+
message_builder_class = message_builder_config_obj.message_class.to_s.constantize
51+
52+
# Create a validator and sender for the subject and version
53+
encoder = Emq::Encoder.new(subject, version, bunny_config.amqp.schemas.registry_url)
54+
sender = Emq::Sender.new(bunny_config.amqp.isg, subject, version)
55+
56+
# Publish each object to the EMQ
57+
Array(objects).each do |object|
58+
# Construct the message to publish from the object using the given configuration
59+
message_object = message_builder_class.new(object:,
60+
configuration: message_builder_config_obj)
61+
.content
62+
63+
# check if the schema_key is present in the payload
64+
next if message_object[schema_key].nil?
65+
66+
# Validate the message against the schema and send it to the EMQ
67+
publish_message = message_object[schema_key]
68+
message = encoder.encode_message(publish_message)
69+
sender.send_message(message)
70+
end
71+
end
72+
73+
# recursively converts a nested hash into an OpenStruct,
74+
# allowing for dot notation access to hash keys and their values.
75+
def self.deep_open_struct(obj)
76+
return obj unless obj.is_a?(Hash)
77+
78+
OpenStruct.new(obj.transform_values { |val| deep_open_struct(val) }) # rubocop:disable Style/OpenStructUse
79+
end
80+
81+
private
82+
83+
# Get the message builder configuration for the schema key and version
84+
# @param [Object] message_config the pipeline configuration to get the message builder
85+
# configuration from
86+
# @param [String] schema_key the key of the schema to get the message builder configuration for
87+
# @param [Integer] version the version of the schema to get the message builder configuration
88+
# @return [OpenStruct | nil] the message builder configuration for the schema key and version
89+
# the builder configuratin should be in the format:
90+
91+
def message_builder_config(message_config, schema_key, version)
92+
children = message_config.public_send(schema_key)&.instance_variable_get(:@children)
93+
return unless children
94+
95+
builder_config = children["#{AVRO_SCHEMA_VERSION_KEY}#{version}"]
96+
return unless builder_config
97+
98+
OpenStruct.new(builder_config) # rubocop:disable Style/OpenStructUse
99+
end
100+
end
101+
end

0 commit comments

Comments
 (0)