Skip to content

Commit 7033caf

Browse files
authored
Merge pull request #2232 from broadinstitute/jb-raw-slot-ux
Adding `raw_location` for AnnData files (SCP-5961)
2 parents 2c40840 + f25d03e commit 7033caf

16 files changed

+232
-68
lines changed

app/controllers/api/v1/study_files_controller.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ def study_file_params
749749
],
750750
expression_file_info_attributes: [
751751
:_id, :_destroy, :library_preparation_protocol, :units, :biosample_input_type, :modality, :is_raw_counts,
752-
raw_counts_associations: []
752+
:raw_location, raw_counts_associations: []
753753
],
754754
heatmap_file_info_attributes: [:id, :_destroy, :custom_scaling, :color_min, :color_max, :legend_label],
755755
cluster_form_info_attributes: [
@@ -758,8 +758,8 @@ def study_file_params
758758
:external_link_description, spatial_cluster_associations: []
759759
],
760760
metadata_form_info_attributes: [:_id, :use_metadata_convention, :description],
761-
extra_expression_form_info_attributes: [:_id, :taxon_id, :description, :y_axis_label],
762-
ann_data_file_info_attributes: [:_id, :reference_file, :data_fragments],
761+
extra_expression_form_info_attributes: [:_id, :taxon_id, :description, :y_axis_label, :raw_location],
762+
ann_data_file_info_attributes: [:_id, :reference_file, :data_fragments, :raw_location],
763763
differential_expression_file_info_attributes: [
764764
:_id, :clustering_association, :annotation_name, :annotation_scope, :computational_method,
765765
:gene_header, :group_header, :comparison_group_header,

app/javascript/components/upload/AnnDataExpressionStep.jsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ const DEFAULT_NEW_PROCESSED_FILE = {
1010
is_raw_counts: false,
1111
biosample_input_type: 'Whole cell',
1212
modality: 'Transcriptomic: unbiased',
13+
raw_location: '',
1314
raw_counts_associations: []
1415
},
1516
file_type: 'Expression Matrix'

app/javascript/components/upload/ExpressionFileForm.jsx

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ import ExpandableFileForm from './ExpandableFileForm'
88

99
import { TextFormField } from './form-components'
1010
import { findBundleChildren, validateFile } from './upload-utils'
11+
import { faQuestionCircle } from '@fortawesome/free-solid-svg-icons'
12+
import { OverlayTrigger, Popover } from 'react-bootstrap'
13+
import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
1114

1215
const REQUIRED_FIELDS = [{ label: 'species', propertyName: 'taxon_id' },
1316
{ label: 'Biosample input type', propertyName: 'expression_file_info.biosample_input_type' },
@@ -19,6 +22,9 @@ const RAW_COUNTS_REQUIRED_FIELDS = REQUIRED_FIELDS.concat([{
1922
const PROCESSED_ASSOCIATION_FIELD = [
2023
{ label: 'Associated raw counts files', propertyName: 'expression_file_info.raw_counts_associations' }
2124
]
25+
const RAW_LOCATION_FIELD = [
26+
{ label: 'Raw count data location', propertyName: 'raw_location' },
27+
]
2228

2329
/** renders a form for editing/uploading an expression file (raw or processed) and any bundle children */
2430
export default function ExpressionFileForm({
@@ -49,6 +55,10 @@ export default function ExpressionFileForm({
4955
if (rawCountsRequired && !isRawCountsFile ) {
5056
requiredFields = requiredFields.concat(PROCESSED_ASSOCIATION_FIELD)
5157
}
58+
const requireLocation = (rawCountsRequired || isRawCountsFile) && isAnnDataExperience
59+
if (requireLocation) {
60+
requiredFields = requiredFields.concat(RAW_LOCATION_FIELD)
61+
}
5262
const validationMessages = validateFile({ file, allFiles, allowedFileExts, requiredFields, isAnnDataExperience })
5363

5464
const associatedRawCounts = !isAnnDataExperience && file.expression_file_info.raw_counts_associations.map(id => ({
@@ -61,9 +71,45 @@ export default function ExpressionFileForm({
6171
setShowRawCountsUnits(rawCountsVal)
6272
}
6373

74+
/** create the tooltip and message for the .obsm key name section */
75+
function rawSlotMessage() {
76+
const rawSlotToolTip = <span>
77+
<OverlayTrigger
78+
trigger={['hover', 'focus']}
79+
rootClose placement="top"
80+
delayHide={1500}
81+
overlay={rawSlotHelpContent()}>
82+
<span> Raw count data location * <FontAwesomeIcon icon={faQuestionCircle}/></span>
83+
</OverlayTrigger>
84+
</span>
85+
86+
return <span >
87+
{rawSlotToolTip}
88+
</span>
89+
}
90+
91+
/** gets the popup message to describe .obsm keys */
92+
function rawSlotHelpContent() {
93+
const layersLink = <a href="https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.layers.html"
94+
target="_blank">layers</a>
95+
const rawLink = <a href="https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.raw.html"
96+
target="_blank">.raw</a>
97+
return <Popover id="cluster-obsm-key-name-popover" className="tooltip-wide">
98+
<div>
99+
Location of raw count data in your AnnData file. This can be the raw slot ({rawLink}) or the name of a layer in
100+
the {layersLink} section.
101+
</div>
102+
</Popover>
103+
}
104+
64105
return <ExpandableFileForm {...{
65-
file, allFiles, updateFile, saveFile,
66-
allowedFileExts, deleteFile, validationMessages, bucketName, isInitiallyExpanded, isAnnDataExperience
106+
file,
107+
allFiles,
108+
updateFile,
109+
saveFile,
110+
allowedFileExts,
111+
deleteFile,
112+
validationMessages, bucketName, isInitiallyExpanded, isAnnDataExperience
67113
}}>
68114
{!isAnnDataExperience &&
69115
<div className="form-group">
@@ -123,25 +169,32 @@ export default function ExpressionFileForm({
123169
{ isAnnDataExperience &&
124170
<div className="row">
125171
<div className="form-radio col-sm-4">
126-
<label className="labeled-select">I have raw count data in the <strong>adata.raw</strong> slot</label>
172+
<label className="labeled-select">I have raw count data</label>
127173
<label className="sublabel">
128174
<input type="radio"
129175
name={`anndata-raw-counts-${file._id}`}
130176
value="true"
131177
checked={isRawCountsFile}
132-
onChange={e => toggleIsRawCounts(true) } />
178+
onChange={e => toggleIsRawCounts(true)}/>
133179
&nbsp;Yes
134180
</label>
135181
<label className="sublabel">
136182
<input type="radio"
137183
name={`anndata-raw-counts-${file._id}`}
138184
value="false"
139185
checked={!isRawCountsFile}
140-
onChange={e => toggleIsRawCounts(false) }/>
186+
onChange={e => toggleIsRawCounts(false)}/>
141187
&nbsp;No
142188
</label>
143189
</div>
144-
{showRawCountsUnits && <div className="col-sm-8">
190+
{requireLocation && <div className="col-sm-4">
191+
<TextFormField label={rawSlotMessage()}
192+
fieldName="raw_location"
193+
file={file}
194+
updateFile={updateFile}
195+
placeholderText='Specify .raw or name of layer'/></div>
196+
}
197+
{ showRawCountsUnits && <div className="col-sm-4">
145198
<ExpressionFileInfoSelect label="Units *"
146199
propertyName="units"
147200
rawOptions={fileMenuOptions.units}

app/lib/differential_expression_service.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ def self.run_differential_expression_job(cluster_group, study, user, annotation_
161161
elsif raw_matrix.file_type == 'AnnData'
162162
de_params[:matrix_file_type] = 'h5ad'
163163
de_params[:file_size] = raw_matrix.upload_file_size
164+
de_params[:raw_location] = raw_matrix.ann_data_file_info.raw_location
164165
else
165166
de_params[:matrix_file_type] = 'dense'
166167
end

app/lib/file_parse_service.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,13 +112,15 @@ def self.run_parse_job(study_file, study, user, reparse: false, persist_on_fail:
112112
)
113113
elsif study_file.needs_raw_counts_extraction?
114114
params_object = AnnDataIngestParameters.new(
115-
anndata_file: study_file.gs_url, extract: %w[raw_counts], obsm_keys: nil,
115+
anndata_file: study_file.gs_url, extract: %w[raw_counts],
116+
raw_location: study_file.ann_data_file_info.raw_location, obsm_keys: nil,
116117
file_size: study_file.upload_file_size
117118
)
118119
else
119120
params_object = AnnDataIngestParameters.new(
120121
anndata_file: study_file.gs_url, obsm_keys: study_file.ann_data_file_info.obsm_key_names,
121-
file_size: study_file.upload_file_size, extract_raw_counts: study_file.is_raw_counts_file?
122+
file_size: study_file.upload_file_size, extract_raw_counts: study_file.is_raw_counts_file?,
123+
raw_location: study_file.ann_data_file_info.raw_location
122124
)
123125
end
124126
# TODO extract and parse Raw Exp Data (SCP-4710)

app/models/ann_data_file_info.rb

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class AnnDataFileInfo
1818
y_axis_max z_axis_min z_axis_max external_link_url external_link_title external_link_description
1919
parse_status spatial_cluster_associations
2020
],
21-
expression: %i[_id data_type taxon_id description expression_file_info y_axis_label]
21+
expression: %i[_id data_type taxon_id description expression_file_info y_axis_label raw_location]
2222
}.freeze
2323

2424
# required keys for data_fragments, by type
@@ -32,6 +32,8 @@ class AnnDataFileInfo
3232
field :has_expression, type: Boolean, default: false
3333
# controls whether or not to ingest data (true: should not ingest data, this is like an 'Other' file)
3434
field :reference_file, type: Boolean, default: true
35+
# location of raw count data, either .raw attribute or in layers[{name}]
36+
field :raw_location, type: String, default: ''
3537
# information from form about data contained inside AnnData file, such as names/descriptions
3638
# examples:
3739
# {
@@ -40,8 +42,8 @@ class AnnDataFileInfo
4042
# }
4143
# { _id: '6033f531e241391884633748', data_type: :expression, description: 'log(TMP) expression' }
4244
field :data_fragments, type: Array, default: []
43-
before_validation :set_default_cluster_fragments!, :sanitize_fragments!
44-
validate :validate_fragments
45+
before_validation :set_default_cluster_fragments!, :set_raw_location!, :sanitize_fragments!
46+
validate :validate_fragments, :enforce_raw_location
4547
after_validation :update_expression_file_info
4648

4749
# collect data frame key_names for clustering data inside AnnData flle
@@ -82,6 +84,8 @@ def merge_form_data(form_data)
8284
fragments << extract_form_fragment(fragment_form, key, *allowed_params)
8385
when :expression
8486
merged_data[:taxon_id] = fragment_form[:taxon_id]
87+
anndata_info_attributes[:raw_location] = merged_data.dig(:expression_file_info_attributes, :raw_location)
88+
merged_data[:expression_file_info_attributes]&.delete(:raw_location) # prevent UnknownAttribute error
8589
merged_exp_fragment = fragment_form.merge(expression_file_info: merged_data[:expression_file_info_attributes])
8690
fragments << extract_form_fragment(merged_exp_fragment, key, *allowed_params)
8791
end
@@ -151,9 +155,18 @@ def update_expression_file_info
151155
return nil if reference_file || exp_fragment.nil? || exp_info.nil?
152156

153157
info_update = exp_fragment.with_indifferent_access[:expression_file_info]
158+
info_update.delete(:raw_location) if info_update[:raw_location]
154159
exp_info.assign_attributes(**info_update) if info_update
155160
end
156161

162+
# pull out raw_location from expression fragment and set as top-level attribute for ease of access
163+
def set_raw_location!
164+
exp_fragment = find_fragment(data_type: :expression) || fragments_by_type(:expression).first
165+
return nil if reference_file || exp_fragment.nil?
166+
167+
self.raw_location = exp_fragment.with_indifferent_access[:raw_location]
168+
end
169+
157170
# extract description field from expression fragment to use as axis label
158171
def expression_axis_label
159172
exp_fragment = find_fragment(data_type: :expression) || fragments_by_type(:expression).first
@@ -218,7 +231,7 @@ def validate_fragments
218231
REQUIRED_FRAGMENT_KEYS.each do |data_type, keys|
219232
fragments = fragments_by_type(data_type)
220233
fragments.each do |fragment|
221-
unset_units_in_exp_fragment(fragment) if data_type == :expression
234+
unset_fields_in_exp_fragment(fragment) if data_type == :expression
222235
missing_keys = keys.map(&:to_s) - fragment.keys.map(&:to_s)
223236
missing_values = keys.select { |key| fragment[key].blank? }
224237
next if missing_keys.empty? && missing_values.empty?
@@ -239,16 +252,23 @@ def validate_fragments
239252
end
240253
end
241254

242-
# unset units in expression fragment since form data won't have value
255+
def enforce_raw_location
256+
if study_file.is_raw_counts_file? && !reference_file && raw_location.blank?
257+
errors.add(:raw_location, 'must have a value for raw count matrices')
258+
end
259+
end
260+
261+
# unset units and raw_location in expression fragment since form data won't have value
243262
# element must be replaced by index in order to persist
244-
def unset_units_in_exp_fragment(fragment)
263+
def unset_fields_in_exp_fragment(fragment)
245264
exp_info = fragment[:expression_file_info]
246265
return nil unless exp_info
247266

248267
unless exp_info[:is_raw_counts]
249268
exp_info.delete(:units)
250269
frag_idx = fragment_index_of(fragment)
251270
data_fragments[frag_idx][:expression_file_info] = exp_info
271+
data_fragments[frag_idx][:raw_location] = ''
252272
end
253273
end
254274
end

app/models/ann_data_ingest_parameters.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class AnnDataIngestParameters
3939
domain_ranges: nil,
4040
extract: %w[cluster metadata processed_expression],
4141
extract_raw_counts: false,
42+
raw_location: nil,
4243
cell_metadata_file: nil,
4344
ingest_cell_metadata: false,
4445
study_accession: nil,
@@ -60,6 +61,7 @@ class AnnDataIngestParameters
6061
format: { with: Parameterizable::GS_URL_REGEXP, message: 'is not a valid GS url' },
6162
allow_blank: true
6263
validates :machine_type, inclusion: Parameterizable::GCE_MACHINE_TYPES
64+
validates :raw_location, presence: true, if: proc { extract_raw_counts || extract.to_a.include?(:raw_counts) }
6365

6466
def initialize(attributes = nil)
6567
super

app/models/differential_expression_parameters.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class DifferentialExpressionParameters
2525
# matrix_file_path: raw counts matrix with source expression data
2626
# matrix_file_type: type of raw counts matrix (dense, sparse)
2727
# matrix_file_id: BSON ID of raw matrix StudyFile for associations
28+
# raw_location: slot in AnnData file where raw data resides (only needed for AnnData matrices)
2829
# gene_file (optional): genes/features file for sparse matrix
2930
# barcode_file (optional): barcodes file for sparse matrix
3031
# machine_type (optional): override for default ingest machine type (uses 'n2d-highmem-8')
@@ -43,6 +44,7 @@ class DifferentialExpressionParameters
4344
matrix_file_path: nil,
4445
matrix_file_type: nil,
4546
matrix_file_id: nil,
47+
raw_location: nil,
4648
gene_file: nil,
4749
barcode_file: nil,
4850
machine_type: 'n2d-highmem-8',
@@ -62,6 +64,7 @@ class DifferentialExpressionParameters
6264
validates :de_type, inclusion: %w[rest pairwise]
6365
validates :group1, :group2, presence: true, if: -> { de_type == 'pairwise' }
6466
validates :matrix_file_type, inclusion: %w[dense mtx h5ad]
67+
validates :raw_location, presence: true, if: -> { matrix_file_type == 'h5ad' }
6568
validates :machine_type, inclusion: Parameterizable::GCE_MACHINE_TYPES
6669
validates :gene_file, :barcode_file,
6770
presence: true,

config/application.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class Application < Rails::Application
2929
config.middleware.use Rack::Brotli
3030

3131
# Docker image for file parsing via scp-ingest-pipeline
32-
config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.40.1'
32+
config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.41.1'
3333

3434
# Docker image for image pipeline jobs
3535
config.image_pipeline_docker_image = 'gcr.io/broad-singlecellportal-staging/image-pipeline:0.1.0_c2b090043'
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
class SetRawLocationOnAnnDataFiles < Mongoid::Migration
2+
def self.up
3+
files = StudyFile.where(
4+
file_type: 'AnnData', 'ann_data_file_info.reference_file' => false, parse_status: 'parsed'
5+
).select(&:is_raw_counts_file?)
6+
files.each do |file|
7+
adata = file.ann_data_file_info
8+
exp_fragment = file.ann_data_file_info.find_fragment(data_type: :expression)
9+
next if adata.raw_location.present? || exp_fragment.blank?
10+
11+
adata.raw_location = '.raw'
12+
exp_fragment[:raw_location] = '.raw'
13+
idx = file.ann_data_file_info.fragment_index_of(exp_fragment)
14+
file.ann_data_file_info.data_fragments[idx] = exp_fragment
15+
file.save
16+
end
17+
end
18+
19+
def self.down
20+
files = StudyFile.where(
21+
file_type: 'AnnData', 'ann_data_file_info.reference_file' => false, parse_status: 'parsed'
22+
).select(&:is_raw_counts_file?)
23+
files.each do |file|
24+
file.ann_data_file_info.raw_location = nil
25+
file.save
26+
end
27+
end
28+
end

0 commit comments

Comments
 (0)