Merge pull request #1433 from sanger/develop

Develop to master
sanger · Sep 19, 2024 · 3cecf15 · 3cecf15
2 parents 4648c9b + 421002b
commit 3cecf15
Show file tree

Hide file tree

Showing 43 changed files with 903 additions and 19 deletions.
diff --git a/.release-version b/.release-version
@@ -1 +1 @@
-7.12.4
+7.13.0
diff --git a/.ruby-version b/.ruby-version
@@ -1 +1 @@
-3.3.4
+3.3.5
diff --git a/Gemfile b/Gemfile
@@ -3,7 +3,7 @@
 source 'https://rubygems.org'
 git_source(:github) { |repo| "https://github.com/#{repo}.git" }
 
-ruby '3.3.4'
+ruby '3.3.5'
 
 gem 'avro'
 gem 'bootsnap', '>= 1.1.0', require: false # Reduces boot times through caching

diff --git a/Gemfile.lock b/Gemfile.lock
@@ -176,7 +176,7 @@ GEM
       mini_portile2 (~> 2.8.2)
       racc (~> 1.4)
     parallel (1.26.3)
-    parser (3.3.4.2)
+    parser (3.3.5.0)
       ast (~> 2.4.1)
       racc
     pry (0.14.2)
@@ -247,8 +247,7 @@ GEM
     regexp_parser (2.9.2)
     reline (0.5.10)
       io-console (~> 0.5)
-    rexml (3.3.6)
-      strscan
+    rexml (3.3.7)
     rspec-core (3.13.1)
       rspec-support (~> 3.13.0)
     rspec-expectations (3.13.3)
@@ -266,22 +265,21 @@ GEM
       rspec-mocks (~> 3.13)
       rspec-support (~> 3.13)
     rspec-support (3.13.1)
-    rubocop (1.65.1)
+    rubocop (1.66.1)
       json (~> 2.3)
       language_server-protocol (>= 3.17.0)
       parallel (~> 1.10)
       parser (>= 3.3.0.2)
       rainbow (>= 2.2.2, < 4.0)
       regexp_parser (>= 2.4, < 3.0)
-      rexml (>= 3.2.5, < 4.0)
-      rubocop-ast (>= 1.31.1, < 2.0)
+      rubocop-ast (>= 1.32.2, < 2.0)
       ruby-progressbar (~> 1.7)
       unicode-display_width (>= 2.4.0, < 3.0)
-    rubocop-ast (1.32.1)
+    rubocop-ast (1.32.3)
       parser (>= 3.3.1.0)
     rubocop-factory_bot (2.26.1)
       rubocop (~> 1.61)
-    rubocop-rails (2.26.0)
+    rubocop-rails (2.26.1)
       activesupport (>= 4.2.0)
       rack (>= 1.1)
       rubocop (>= 1.52.0, < 2.0)
@@ -313,7 +311,6 @@ GEM
       listen (>= 2.7, < 4.0)
       spring (>= 4)
     stringio (3.1.1)
-    strscan (3.1.0)
     thor (1.3.2)
     timeout (0.4.1)
     tzinfo (2.0.6)
@@ -328,7 +325,7 @@ GEM
     websocket-driver (0.7.6)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.5)
-    yard (0.9.36)
+    yard (0.9.37)
     zeitwerk (2.6.18)
 
 PLATFORMS
@@ -368,7 +365,7 @@ DEPENDENCIES
   yard
 
 RUBY VERSION
-   ruby 3.3.4
+   ruby 3.3.5
 
 BUNDLED WITH
    2.5.9
diff --git a/app/controllers/v1/workflow_steps_controller.rb b/app/controllers/v1/workflow_steps_controller.rb
@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+
+module V1
+  # WorkflowStepsController
+  # This controller provides a JSON:API for {WorkflowStep} resources.
+  class WorkflowStepsController < ApplicationController
+  end
+end
diff --git a/app/controllers/v1/workflows_controller.rb b/app/controllers/v1/workflows_controller.rb
@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+
+module V1
+  # WorkflowsController
+  # This controller provides a JSON:API for {Workflow} resources.
+  class WorkflowsController < ApplicationController
+  end
+end
diff --git a/app/messages/emq/publishing_job.rb b/app/messages/emq/publishing_job.rb
@@ -73,8 +73,10 @@ def publish(objects, message_config, schema_key) # rubocop:disable Metrics/AbcSi
         # Log success message after successful publishing
         Rails.logger.info('Published volume tracking message to EMQ')
       rescue StandardError => e
-        # Raise an exception if any error occurs
-        raise "Failed to publish message to EMQ: #{e.message}"
+        # DO NOT Raise an exception if any error occurs; logs the error instead
+        # This is to prevent the job from failing and to allow the job to continue
+        # These logs can be monitored through Kibana
+        Rails.logger.error("Failed to publish message to EMQ: #{e.message}")
       end
     end
 

diff --git a/app/models/workflow.rb b/app/models/workflow.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+
+# Workflow
+class Workflow < ApplicationRecord
+  include Pipelineable
+  has_many :workflow_steps, dependent: :destroy
+  accepts_nested_attributes_for :workflow_steps, allow_destroy: true
+
+  validates :name, presence: true, uniqueness: { case_sensitive: false }
+end
diff --git a/app/models/workflow_step.rb b/app/models/workflow_step.rb
@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+
+# WorkflowStep
+class WorkflowStep < ApplicationRecord
+  belongs_to :workflow
+
+  validates :code, presence: true, uniqueness: { case_sensitive: false }
+end
diff --git a/app/pipelines/pipelines.rb b/app/pipelines/pipelines.rb
@@ -8,7 +8,8 @@ module Pipelines
   # In order to maintain consistent numbering, this has been pulled out into
   # a constant. Please do *not* remove entries from this list, as it could
   # result in legacy data being reassigned to the incorrect pipelines
-  NAMES = { pacbio: 0, ont: 1, saphyr: 2, qc_result: 3, reception: 4 }.freeze
+  NAMES = { pacbio: 0, ont: 1, saphyr: 2, qc_result: 3, reception: 4, extraction: 5, sample_qc: 6,
+            hic: 7, bio_nano: 8 }.freeze
   HANDLERS = {
     pacbio: Pacbio,
     ont: Ont,

diff --git a/app/resources/v1/workflow_resource.rb b/app/resources/v1/workflow_resource.rb
@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+
+module V1
+  # Provides a JSON:API representation of {Workflow}.
+  #
+  # For more information about JSON:API see the [JSON:API Specifications](https://jsonapi.org/format/)
+  # or look at the [JSONAPI::Resources](http://jsonapi-resources.com/) package for the service
+  # implementation of the JSON:API standard.
+  class WorkflowResource < JSONAPI::Resource
+    # @!attribute [rw] name
+    #   @return [String] the name of the workflow
+    # @!attribute [rw] pipeline
+    #   @return [String] the pipeline of the workflow
+    attributes :name, :pipeline
+
+    # Define the relationship with workflow steps
+    has_many :workflow_steps
+  end
+end
diff --git a/app/resources/v1/workflow_step_resource.rb b/app/resources/v1/workflow_step_resource.rb
@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+
+module V1
+  # Provides a JSON:API representation of {WorkflowStep}.
+  #
+  # For more information about JSON:API see the [JSON:API Specifications](https://jsonapi.org/format/)
+  # or look at the [JSONAPI::Resources](http://jsonapi-resources.com/) package for the service
+  # implementation of the JSON:API standard.
+  class WorkflowStepResource < JSONAPI::Resource
+    # @!attribute [rw] code
+    #   @return [String] the code of the workflow step
+    # @!attribute [rw] stage
+    #   @return [String] the stage of the workflow step
+    attributes :code, :stage
+
+    # Define the relationship with workflow
+    has_one :workflow
+  end
+end
diff --git a/compile-build b/compile-build
@@ -25,7 +25,8 @@ tar \
   --exclude='./tmp' \
   --exclude='./vendor/bundle' \
   --exclude='release.tar.gz' \
+  --exclude='./documentation' \
   -zcvf /tmp/release.tar.gz ./
 mv /tmp/release.tar.gz ./release.tar.gz
 echo 'Release complete!'
-echo `pwd`/release.tar.gz
+echo `pwd`/release.tar.gz
diff --git a/config/routes.rb b/config/routes.rb
@@ -16,6 +16,10 @@
     jsonapi_resources :tag_sets, only: %i[index create update destroy]
     jsonapi_resources :tags,     only: %i[index create update destroy]
 
+    jsonapi_resources :workflows, only: [:index] do
+      jsonapi_resources :workflow_steps, only: [:index]
+    end
+
     namespace :saphyr do
       jsonapi_resources :runs,          only: %i[index create show update destroy]
       jsonapi_resources :chips,         only: %i[index create show update destroy]

diff --git a/db/migrate/20240909162843_create_workflows.rb b/db/migrate/20240909162843_create_workflows.rb
@@ -0,0 +1,11 @@
+class CreateWorkflows < ActiveRecord::Migration[7.2]
+  def change
+    create_table :workflows do |t|
+      t.string :name
+      t.integer :pipeline
+
+      t.timestamps
+    end
+    add_index :workflows, :name, unique: true
+  end
+end
diff --git a/db/migrate/20240909163449_create_workflow_steps.rb b/db/migrate/20240909163449_create_workflow_steps.rb
@@ -0,0 +1,12 @@
+class CreateWorkflowSteps < ActiveRecord::Migration[7.2]
+  def change
+    create_table :workflow_steps do |t|
+      t.references :workflow, null: false, foreign_key: true
+      t.string :code
+      t.string :stage
+
+      t.timestamps
+    end
+    add_index :workflow_steps, :code, unique: true
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema[7.1].define(version: 2024_07_26_130146) do
+ActiveRecord::Schema[7.2].define(version: 2024_09_09_163449) do
   create_table "aliquots", charset: "utf8mb3", force: :cascade do |t|
     t.float "volume"
     t.float "concentration"
@@ -500,6 +500,24 @@
     t.index ["plate_id"], name: "index_wells_on_plate_id"
   end
 
+  create_table "workflow_steps", charset: "utf8mb3", force: :cascade do |t|
+    t.bigint "workflow_id", null: false
+    t.string "code"
+    t.string "stage"
+    t.datetime "created_at", null: false
+    t.datetime "updated_at", null: false
+    t.index ["code"], name: "index_workflow_steps_on_code", unique: true
+    t.index ["workflow_id"], name: "index_workflow_steps_on_workflow_id"
+  end
+
+  create_table "workflows", charset: "utf8mb3", force: :cascade do |t|
+    t.string "name"
+    t.integer "pipeline"
+    t.datetime "created_at", null: false
+    t.datetime "updated_at", null: false
+    t.index ["name"], name: "index_workflows_on_name", unique: true
+  end
+
   add_foreign_key "ont_flowcells", "ont_pools"
   add_foreign_key "ont_flowcells", "ont_runs"
   add_foreign_key "ont_requests", "data_types"
@@ -517,4 +535,5 @@
   add_foreign_key "qc_results", "qc_assay_types"
   add_foreign_key "qc_results", "qc_receptions"
   add_foreign_key "requests", "receptions"
+  add_foreign_key "workflow_steps", "workflows"
 end
diff --git a/documentation/multiplexing/docs/architectural-overview.md b/documentation/multiplexing/docs/architectural-overview.md
@@ -0,0 +1,60 @@
+# Architectural Overview
+
+As mentioned in the [Multiplexing process overview](index.md), the multiplexing architecture is mainly based around the creation and use of pools. This document will provide an overview of the architecture of the multiplexing system, including the responsibilities of each component and how they interact with each other with a focus on pools.
+
+## Entity Relationship Diagram
+
+In order to understand the architecture of the multiplexing system, it is important to understand the entities involved in the system. The following diagram shows the relevant entities and their relationships.
+
+!!! note
+
+    While both PacBio and ONT Traction pipelines support multiplexing and pooling, the entities and relationships shown in the diagram are specific to the PacBio Traction pipeline. The main difference between the two is the use of polymorphic aliquots and the ability to use premade libraries in the PacBio pipeline.
+
+<figure markdown="span">
+    ![ERD](img/pacbio-pool-erd.png)
+    <figcaption>Entities used for Multiplexing/Pooling in PacBio Traction</figcaption>
+</figure>
+
+### Understanding the entities
+
+**TagSet**: Represents a set of tags that are used to tag samples in a pool. This entity contains information about the tag set, such as the name and the pipeline it is used in. Only one tag set is allowed to be used per pool.
+
+**Tag**: Represents a tag that is used to tag a sample in a pool. A tag belongs to a tag set and can be used in multiple pools. This entity contains information about the tag, such as the oligo/sequence. Tag oligos are unique within a tag set.
+
+**Tube**: A wrapper entity that represents a physical tube and gives libraries and pools their Traction IDs (barcodes). A tube can have one item, typically a sample, library or pool.
+
+**Pacbio::Request**: Represents an instance of a sample in Traction. This is the entity that is used to link back to the actual sample to retrieve related data like sample name. It also contains extra metadata about the sample, such as the source barcode, the library type and the cost code.
+
+**Pacbio::Library**: Represents a library that is created from a request (sample). A library can only have one request where as a request can belong to many libraries. This entity contains information about the library, such as the volume, concentration, insert size, and library kit.
+
+**Pacbio::Pool**: Represents a pool that is created from one or more libraries or requests (samples). A pool can have many libraries and a library can belong to many pools. This entity contains information about the pool, such as the total volume, concentration, insert size. It relates to libraries and requests (samples) via the `aliquot` entity using polymorphism.
+
+**Aliquot**: Represents a polymorphic entity that describes a piece of something that has been used somewhere. The `source` is a polymorphic representation of where is has come from, typically a request or library. The `used_by` is a polymorphic representation of where it has been used, typically a pool or well. An Aliquot can have one of two types, `primary` and `derived`. A `primary` aliquot is one that represents an entities initial state, such as initial volume, whereas a `derived` aliquot is one that is created from a primary aliquot and will have a volume of how much has been 'used' in that instance.
+
+**Pacbio::Well**: Represents a well that is used on a sequencing plate. A well typically has one pool or library but can can have any number of pools and libraries via aliquot polymorphism. This entity contains information about the well, such as the column and row from the plate it is on. It can belong to just one plate.
+
+**Pacbio::Plate**: Represents a plate that is used in a sequencing run. A plate can have many wells. This entity contains information about the plate, such as the plate number and the plate sequencing kit box barcode.
+
+**Pacbio::Run**: Represents a sequencing run that is setup in Traction. A run can have any number of plates, typically one or two. This entity contains information about the run, such as the run name, the sequencing kit used, the state and the system used.
+
+### Using the entities
+
+Using the entities above, an illustrated simple example of a multiplexed pool is shown below with some example data:
+
+<figure markdown="span">
+    ![Multiplexed Pool Example](img/multiplexed-pool-example.png)
+    <figcaption>Example of a Multiplexed Pool</figcaption>
+</figure>
+
+Some important points to note about the example:
+
+- The `used_by` and `source` relationships are shown coming from the `Aliquot` entity are polymorphic relationships stored as fields in `Aliquot`. The `source` is the entity that the aliquot has come from, and the `used_by` is the entity that the aliquot has been used in.
+- Aliquots can have a source of either a `Pacbio::Library` or a `Pacbio::Request` during pool creation.
+- A pool has two types of aliquot, a single `primary` and mutliple `derived`. Primary aliquots are created from the pool and match the pool's total volume and concentration and act as the initial state of the pool. Derived aliquots are created from their source (library or request) and are used_by the pool to represent how much of the source has been used by the pool.
+- Pacbio::Library's have their own tube as they are created before this pooling process and assigned their own barcode.
+- Pacbio::Library's link back to their sample through their own Pacbio::Request but it is not shown here for simplicity.
+- Pacbio::Request parent data is not shown here but it would be used to link back to the actual sample data through an imported plate or tube.
+
+## Architectural decisions
+
+PacBio pooling used to be the same as ONT pooling in Traction. Pools could only support requests in the UI, and then in traction-service libraries would be created in the background automatically from those requests upon pool creation. Pools would also be directly related to libraries instead of relating through aliquots. This was changed to make pools more flexible and allow pre-made libraries to be added to pools in the UI as it was closer aligned to what the lab are doing in reality. The current approach of using aliquots also gives us a flexible and extensible way to manage pools. In the future we could add other entities to be used in pools such as other pools.
diff --git a/documentation/multiplexing/docs/img/favicon.ico b/documentation/multiplexing/docs/img/favicon.ico
diff --git a/documentation/multiplexing/docs/img/multiplexed-pool-example.png b/documentation/multiplexing/docs/img/multiplexed-pool-example.png
diff --git a/documentation/multiplexing/docs/img/multiplexing-process-map.png b/documentation/multiplexing/docs/img/multiplexing-process-map.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-edit.png b/documentation/multiplexing/docs/img/pacbio-pool-edit.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-erd.png b/documentation/multiplexing/docs/img/pacbio-pool-erd.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-new-invalid-pool.png b/documentation/multiplexing/docs/img/pacbio-pool-new-invalid-pool.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-new-pool-created.png b/documentation/multiplexing/docs/img/pacbio-pool-new-pool-created.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-new-pooled-samples.png b/documentation/multiplexing/docs/img/pacbio-pool-new-pooled-samples.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-new-scan-labware.png b/documentation/multiplexing/docs/img/pacbio-pool-new-scan-labware.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-new-select-samples-table.png b/documentation/multiplexing/docs/img/pacbio-pool-new-select-samples-table.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-new-select-samples.png b/documentation/multiplexing/docs/img/pacbio-pool-new-select-samples.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-new-tag-selection.png b/documentation/multiplexing/docs/img/pacbio-pool-new-tag-selection.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pool-new.png b/documentation/multiplexing/docs/img/pacbio-pool-new.png
diff --git a/documentation/multiplexing/docs/img/pacbio-pools-index.png b/documentation/multiplexing/docs/img/pacbio-pools-index.png
diff --git a/documentation/multiplexing/docs/index.md b/documentation/multiplexing/docs/index.md
@@ -0,0 +1,35 @@
+# Multiplexing process overview
+
+## Introduction
+
+Traction represents the multiplexing process through the creation and use of pools. A pool is a collection of libraries (tagged samples) that are combined together for sequencing. Each library/sample in the pool is tagged with a unique tag from a given tag set. The pool itself has some metadata about what it contains namely: template prep kit box barcode, total volume, concentration and insert size. Pools are used in sequencing runs as single entities whilst representing the individual libraries within the pool. Once a sequencing run has been setup a sample sheet may be generated where the pool's contents are then represented in a single flowcell/well containing all the pool's samples and library data.
+
+A visual representation of the multiplexing process is shown below:
+
+![process](img/multiplexing-process-map.png)
+
+## Basic process
+
+The basic process of multiplexing in Traction is as follows:
+
+1. Samples are imported into Traction and requests are created for them.
+    - Typically imported via plates or tubes from Sequencescape.
+2. Libraries are created from the requests.
+    - Libraries are assigned metadata including volume, concentration, insert size and library kit.
+    - A tag may be assigned to the libraries at this stage.
+3. Pools are created from the libraries.
+    - Users specify which libraries they would like to use in a pool.
+    - Requests can also be added to the pool directly from the plates/tubes they were imported on.
+    - A single tag set is used and its tags issued to all libraries in the pool. If there is more than 1 library in the pool. Each library in the pool must be tagged with a unique tag from the tag set.
+    - Pool metadata includes template prep kit box barcode, total volume, concentration and insert size.
+4. Pools are used in sequencing runs.
+    - Pools are added to wells on plates for sequencing.
+5. A sample sheet is generated.
+    - The sample sheet contains the pool's contents and metadata.
+    - The sample sheet is used to setup the sequencing run on the sequencing machines.
+
+## Alternative multiplexing strategies
+
+Multiplexing is typically done before run setup, during the pooling process, instead of during run setup by using multiple libraries in each well, because it allows for more flexibility in the lab and a better user experience. Doing it upfront in the pooling process means we can be sure there are no tag clashes, that the data is correct and it reduces the risk of errors during run setup.
+
+However, Traction also supports pooling at the sequencing run level where multiple pools or libraries are combined together in a single flowcell/well in a run. This is generally less common as it is more complex, requiring consistent tagging across multiple pools/libraries in order to work. But it is supported in Traction and can be used if required.