Skip to content

Commit 68225af

Browse files
authored
Merge pull request #2269 from broadinstitute/development
Release 1.97.0
2 parents 9c8c739 + f01c12a commit 68225af

14 files changed

+371
-23
lines changed

app/controllers/api/v1/study_files_controller.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ def perform_update!(study_file)
388388
end
389389
end
390390

391-
if ['Expression Matrix', 'MM Coordinate Matrix'].include?(study_file.file_type) && !safe_file_params[:y_axis_label].blank?
391+
if ['Expression Matrix', 'MM Coordinate Matrix'].include?(study_file.file_type) && safe_file_params[:y_axis_label].present?
392392
# if user is supplying an expression axis label, update default options hash
393393
study.default_options[:expression_label] = safe_file_params[:y_axis_label]
394394
study.save
@@ -400,9 +400,9 @@ def perform_update!(study_file)
400400
end
401401
end
402402

403-
if safe_file_params[:upload].present? && !is_chunked ||
404-
safe_file_params[:remote_location].present? ||
405-
study_file.needs_raw_counts_extraction?
403+
if (safe_file_params[:upload].present? && !is_chunked) ||
404+
safe_file_params[:remote_location].present? ||
405+
study_file.needs_raw_counts_extraction?
406406
complete_upload_process(study_file, parse_on_upload)
407407
end
408408
end

app/lib/cluster_cache_service.rb

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,54 @@ def self.cache_study_defaults(study)
7474
Rails.logger.error "Error in caching defaults for #{study.accession}: (#{e.class.name}) #{e.message}"
7575
end
7676
end
77+
78+
# set the default annotation for a study to the most relevant annotation available,
79+
# such as cell_type__ontology_label or seurat_clusters
80+
def self.configure_default_annotation(study)
81+
return false if default_annotation_configured?(study)
82+
83+
best_available = best_available_annotation(study)
84+
return false if best_available.nil?
85+
86+
# use default_options[:annotation] as default_annotation has fallback logic and we want the 'configured' value
87+
existing_default = study.default_options[:annotation]
88+
return false if best_available == existing_default
89+
90+
study.default_options[:annotation] = best_available
91+
Rails.logger.info "Changing default annotation in #{study.accession} from " \
92+
"#{existing_default.presence || 'unassigned'} to #{best_available}"
93+
study.save(validate: false) # prevent validation errors for older studies
94+
log_props = {
95+
studyAccession: study.accession,
96+
default_annotation: best_available,
97+
previous_annotation: existing_default
98+
}
99+
MetricsService.log('study-default-annotation', log_props, study.user)
100+
end
101+
102+
# find the most relevant annotation to display as the default
103+
# will prioritize convention-based cell types, then other 'cell type'-like annotations,
104+
# followed by clustering algorithms and anything label/categorical in nature
105+
def self.best_available_annotation(study)
106+
annotations = DifferentialExpressionService.find_eligible_annotations(study)
107+
return nil if annotations.empty?
108+
109+
ontology = annotations.detect { |a| a[:annotation_name] == 'cell_type__ontology_label' }
110+
author_cell_type = annotations.detect { |a| a[:annotation_name] =~ /author.*cell.*type/i }
111+
cell_type = annotations.detect { |a| a[:annotation_name] =~ DifferentialExpressionService::CELL_TYPE_MATCHER }
112+
clustering = annotations.detect { |a| a[:annotation_name] =~ DifferentialExpressionService::CLUSTERING_MATCHER }
113+
category = annotations.detect { |a| a[:annotation_name] =~ DifferentialExpressionService::CATEGORY_MATCHER }
114+
best_avail = ontology || author_cell_type || cell_type || clustering || category
115+
116+
best_avail.present? ? [best_avail[:annotation_name], 'group', best_avail[:annotation_scope]].join('--') : nil
117+
end
118+
119+
# helper to determine if a user set the default annotation manually by checking HistoryTracker for events
120+
def self.default_annotation_configured?(study)
121+
study.history_tracks.detect do |track|
122+
track.original.dig('default_options', 'annotation').present? &&
123+
track.original.dig('default_options', 'annotation') != '' &&
124+
track.modified.dig('default_options', 'annotation') != track.original.dig('default_options', 'annotation')
125+
end.present?
126+
end
77127
end

app/lib/differential_expression_service.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ class DifferentialExpressionService
55
CELL_TYPE_MATCHER = /cell.*type/i
66
# possible clustering algorithm results
77
CLUSTERING_MATCHER = /(clust|seurat|leiden|louvain|snn_res)/i
8+
# any categorical-type labels
9+
CATEGORY_MATCHER = /(categor|labels)/i
810
# union of all allowed annotations
9-
ALLOWED_ANNOTS = Regexp.union(CELL_TYPE_MATCHER, CLUSTERING_MATCHER)
11+
ALLOWED_ANNOTS = Regexp.union(CELL_TYPE_MATCHER, CLUSTERING_MATCHER, CATEGORY_MATCHER)
1012
# specific annotations to exclude from automation
1113
EXCLUDED_ANNOTS = /(enrichment__cell_type)/i
1214
# default quota for user-requested DE jobs
@@ -250,7 +252,7 @@ def self.find_eligible_annotations(study, skip_existing: false)
250252
annotations += metadata.map { |meta| { annotation_name: meta.name, annotation_scope: 'study' } }
251253
# special gotcha to remove 'cell_type' metadata annotation if 'cell_type__ontology_label' is present
252254
if annotations.detect { |annot| annot[:annotation_name] == 'cell_type__ontology_label' }.present?
253-
annotations.reject! { |annot| annot[:annotation_name] == 'cell_type'}
255+
annotations.reject! { |annot| annot[:annotation_name] == 'cell_type' }
254256
end
255257
cell_annotations = []
256258
groups_to_process = study.cluster_groups.select { |cg| cg.cell_annotations.any? }

app/lib/metrics_service.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ def self.log(name, props={}, user=nil, request: nil)
123123
headers: headers,
124124
payload: post_body
125125
}
126-
127126
self.post_to_bard(params, user)
128127
end
129128

app/models/ingest_job.rb

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,8 @@ def cluster_name_by_file_type
557557

558558
# set the default annotation for the study, if not already set
559559
def set_default_annotation
560+
ClusterCacheService.configure_default_annotation(study)
561+
study.reload
560562
return if study.default_options[:annotation].present?
561563

562564
cell_metadatum = study.cell_metadata.keep_if(&:can_visualize?).first || study.cell_metadata.first
@@ -1062,9 +1064,29 @@ def anndata_summary_props
10621064
pipeline_args.detect { |c| c == study_file.id.to_s } &&
10631065
(pipeline_args & CORE_ACTIONS).any?
10641066
end
1067+
if previous_jobs.empty?
1068+
run = get_ingest_run
1069+
perftime = (TimeDifference.between(
1070+
event_timestamp(run.create_time), event_timestamp(run.update_time)
1071+
).in_seconds * 1000).to_i
1072+
status = run.status.state == "FAILED" ? 'failed' : 'success'
1073+
return {
1074+
perfTime: perftime,
1075+
fileName: study_file.name,
1076+
fileType: study_file.file_type,
1077+
fileSize: study_file.upload_file_size,
1078+
studyAccession: study.accession,
1079+
trigger: study_file.upload_trigger,
1080+
jobStatus: status,
1081+
numFilesExtracted: 0,
1082+
machineType: params_object.machine_type,
1083+
action: 'unknown',
1084+
exitCode: exit_code
1085+
}
1086+
end
10651087
# get total runtime from initial extract to final parse
1066-
initial_extract = previous_jobs.last
1067-
final_parse = previous_jobs.first
1088+
initial_extract = previous_jobs.min_by(&:create_time)
1089+
final_parse = previous_jobs.max_by(&:create_time)
10681090
start_time = event_timestamp(initial_extract.create_time)
10691091
end_time = event_timestamp(final_parse.update_time) # update_time is a proxy for last event timestamp
10701092
job_perftime = (TimeDifference.between(start_time, end_time).in_seconds * 1000).to_i
@@ -1079,7 +1101,7 @@ def anndata_summary_props
10791101
if job_status == 'failed'
10801102
first_failure = previous_jobs.reverse.detect { |job| client.job_error(job.name).present? }
10811103
args = client.get_job_command_line(job: first_failure)
1082-
error_action = args.detect {|c| BatchApiClient::FILE_TYPES_BY_ACTION.keys.include?(c.to_sym) }
1104+
error_action = args.detect { |c| BatchApiClient::FILE_TYPES_BY_ACTION.keys.include?(c.to_sym) }
10831105
code = client.exit_code_from_task(first_failure.name)
10841106
end
10851107
# count total number of files extracted

app/models/study.rb

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -752,10 +752,11 @@ def author
752752
before_validation :set_data_dir, :set_firecloud_workspace_name, on: :create
753753
after_validation :assign_accession, on: :create
754754
# before_save :verify_default_options
755-
after_create :make_data_dir, :set_default_participant, :check_bucket_read_access
755+
after_create :make_data_dir, :set_default_participant, :check_bucket_read_access, :log_study_creation
756756
before_destroy :ensure_cascade_on_associations
757757
after_destroy :remove_data_dir
758758
before_save :set_readonly_access
759+
after_update :log_study_state
759760

760761
# search definitions
761762
index({"name" => "text", "description" => "text"}, {background: true})
@@ -1836,12 +1837,61 @@ def delete_convention_data
18361837
end
18371838
end
18381839

1840+
## State tracking methods
1841+
18391842
def last_public_date
1840-
history_tracks.where('modified.public': true).order_by(created_at: :desc).first&.created_at
1843+
history_tracks.where('modified.public': true).order_by(created_at: :desc).last&.created_at
18411844
end
18421845

18431846
def last_initialized_date
1844-
history_tracks.where('modified.initialized': true).order_by(created_at: :desc).first&.created_at
1847+
history_tracks.where('modified.initialized': true).order_by(created_at: :desc).last&.created_at
1848+
end
1849+
1850+
# find the last time an attribute changed in the history
1851+
def last_change_for(field)
1852+
history_tracks.select { |h| h.modified.keys.include?(field.to_s) }.last
1853+
end
1854+
1855+
# determine if a field changed from a given value
1856+
def field_changed_from?(history_track, field, value)
1857+
return false if history_track.nil?
1858+
1859+
history_track.original[field] == value && history_track.modified[field] != value
1860+
end
1861+
1862+
# helper to determine if a study was just made public
1863+
def was_just_published?(cutoff: nil)
1864+
track = last_change_for(:public)
1865+
field_changed_from?(track, :public, false) && track.created_at >= (cutoff || 1.second.ago)
1866+
end
1867+
1868+
# helper to determine if a study was just initialized
1869+
def was_just_initialized?(cutoff: nil)
1870+
track = last_change_for(:initialized)
1871+
field_changed_from?(track, :initialized, false) && track.created_at >= (cutoff || 1.second.ago)
1872+
end
1873+
1874+
# basic Mixpanel props for study state tracking
1875+
def mixpanel_state_props
1876+
{
1877+
studyAccession: accession,
1878+
created_at:,
1879+
domain: user.email.split('@').last,
1880+
numCells: cell_count,
1881+
public:,
1882+
last_public_date:,
1883+
initialized:,
1884+
last_initialized_date:
1885+
}
1886+
end
1887+
1888+
def log_study_creation
1889+
MetricsService.log('study-creation', mixpanel_state_props, user)
1890+
end
1891+
1892+
# only log state when a study is published/initialized
1893+
def log_study_state
1894+
MetricsService.log('study-state', mixpanel_state_props, user) if was_just_published? || was_just_initialized?
18451895
end
18461896

18471897
private

bin/run_orch_smoke_test.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ DOCKER_IMAGE_NAME="gcr.io/broad-singlecellportal-staging/single-cell-portal"
1818
DOCKER_IMAGE_VERSION="development"
1919
PASSENGER_APP_ENV="test"
2020
NON_DOCKERIZED="false"
21+
# disable warnings about frozen literals
22+
export RUBYOPT=--disable-frozen-string-literal
2123

2224
while getopts "k:e:v:d" OPTION; do
2325
case $OPTION in
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
class SetRelevantDefaultAnnotations < Mongoid::Migration
2+
def self.up
3+
accessions = Study.where(queued_for_deletion: false).pluck(:accession)
4+
accessions.each do |accession|
5+
study = Study.find_by(accession:)
6+
begin
7+
ClusterCacheService.configure_default_annotation(study)
8+
rescue => e
9+
ErrorTracker.report_exception(e, nil,{ study: })
10+
end
11+
end
12+
end
13+
14+
def self.down
15+
# default annotations can be reset manually if needed
16+
end
17+
end

test/integration/external/study_validation_test.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,6 @@ class StudyValidationTest < ActionDispatch::IntegrationTest
130130
end
131131
puts "After #{seconds_slept} seconds, " + (example_files.values.map { |e| "#{e[:name]} is #{e[:object].parse_status}"}).join(", ") + '.'
132132

133-
study.reload
134-
135133
example_files.values.each do |e|
136134
e[:object].reload # address potential race condition between parse_status setting to 'failed' and DeleteQueueJob executing
137135
assert_equal 'failed', e[:object].parse_status, "Incorrect parse_status for #{e[:name]}"
@@ -140,6 +138,8 @@ class StudyValidationTest < ActionDispatch::IntegrationTest
140138
assert cached_file.present?, "Did not find cached file at #{e[:cache_location]} in #{study.bucket_id}"
141139
end
142140

141+
sleep(5)
142+
study.reload
143143
assert_equal 0, study.cell_metadata.size
144144
assert_equal 0, study.genes.size
145145
assert_equal 0, study.cluster_groups.size

test/lib/summary_stats_utils_test.rb

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -157,18 +157,21 @@ class SummaryStatsUtilsTest < ActiveSupport::TestCase
157157
end
158158

159159
test 'should get data retention policy report' do
160+
expected_studies = Study.where(detached: false, firecloud_project: FireCloudClient::PORTAL_NAMESPACE)
160161
client_mock = Minitest::Mock.new
161162
bucket_mock = Minitest::Mock.new
162-
created_at = DateTime.now.in_time_zone - 1.day
163-
bucket_mock.expect :nil?, false
164163
file_mock = Minitest::Mock.new
165-
2.times do
166-
bucket_mock.expect :next?, false
167-
file_mock.expect :next?, false
164+
created_at = DateTime.now.in_time_zone - 1.day
165+
expected_studies.each do |study|
166+
bucket_mock.expect :nil?, false
167+
2.times do
168+
bucket_mock.expect :next?, false
169+
file_mock.expect :next?, false
170+
end
171+
file_mock.expect :any?, true
172+
bucket_mock.expect :files, file_mock
173+
client_mock.expect :get_workspace_bucket, bucket_mock, [study.bucket_id]
168174
end
169-
file_mock.expect :any?, true
170-
bucket_mock.expect :files, file_mock
171-
client_mock.expect :get_workspace_bucket, bucket_mock, [String]
172175
FireCloudClient.stub :new, client_mock do
173176
SummaryStatsUtils.stub :bytes_and_last_created_for,
174177
[250.gigabytes, created_at],

0 commit comments

Comments
 (0)