Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions chord_metadata_service/chord/ingest/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from structlog.stdlib import BoundLogger

from chord_metadata_service.chord.models import Dataset
from chord_metadata_service.experiments.descriptions import DEFAULT_GA4GH_TEMPLATE_ID
from chord_metadata_service.experiments import models as em
from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA, EXPERIMENT_RESULT_SCHEMA
from chord_metadata_service.phenopackets import models as pm
Expand Down Expand Up @@ -89,18 +90,25 @@ def ingest_experiment(
new_experiment_id = experiment_data.get("id", str(uuid.uuid4()))
study_type = experiment_data.get("study_type")
experiment_type = experiment_data["experiment_type"]
experiment_ontology = experiment_data.get("experiment_ontology", [])
experiment_ontology = experiment_data.get("experiment_ontology", {})
molecule = experiment_data.get("molecule")
molecule_ontology = experiment_data.get("molecule_ontology", [])
molecule_ontology = experiment_data.get("molecule_ontology", {})
library_strategy = experiment_data.get("library_strategy")
library_source = experiment_data.get("library_source")
library_selection = experiment_data.get("library_selection")
library_layout = experiment_data.get("library_layout")
library_id = experiment_data.get("library_id")
library_extract_id = experiment_data.get("library_extract_id")
insert_size = experiment_data.get("insert_size")
description = experiment_data.get("description")
protocol_url = experiment_data.get("protocol_url")
library_description = experiment_data.get("library_description")
extraction_protocol = experiment_data.get("extraction_protocol")
reference_registry_id = experiment_data.get("reference_registry_id")
qc_flags = experiment_data.get("qc_flags", [])
biosample_id = experiment_data.get("biosample")
experiment_results = experiment_data.get("experiment_results", [])
ga4gh_template_id = experiment_data.get("ga4gh_template_id", DEFAULT_GA4GH_TEMPLATE_ID)
instrument = experiment_data.get("instrument", {})
extra_properties = experiment_data.get("extra_properties", {})

Expand Down Expand Up @@ -132,10 +140,17 @@ def ingest_experiment(
library_source=library_source,
library_selection=library_selection,
library_layout=library_layout,
library_id=library_id,
library_extract_id=library_extract_id,
insert_size=insert_size,
description=description,
protocol_url=protocol_url,
library_description=library_description,
extraction_protocol=extraction_protocol,
reference_registry_id=reference_registry_id,
qc_flags=qc_flags,
biosample=biosample,
ga4gh_template_id=ga4gh_template_id,
instrument=instrument_db,
extra_properties=extra_properties,
dataset=Dataset.objects.get(identifier=dataset_id)
Expand Down
29 changes: 13 additions & 16 deletions chord_metadata_service/chord/tests/example_experiment.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,23 @@
"biosample": "sample1",
"study_type": "Epigenomics",
"experiment_type": "Other",
"experiment_ontology": [
{
"experiment_ontology": {
"id": "http://www.ebi.ac.uk/efo/EFO_0002692",
"label": "ChIP-seq"
}
],
},
"library_strategy": "ChIP-Seq",
"library_source": "Genomic",
"library_selection": "Random",
"library_layout": "Single",
"library_id": "lib_id_01",
"ga4gh_template_id": "v1.0",
"insert_size": 400,
"extraction_protocol": "NGS",
"molecule": "genomic DNA",
"molecule_ontology": [
{
"molecule_ontology": {
"id": "SO:0000991",
"label": "genomic DNA"
}
],
},
"experiment_results": [
{
"identifier": "sample1_01",
Expand Down Expand Up @@ -72,24 +71,22 @@
"biosample": "sample2",
"study_type": "Epigenomics",
"experiment_type": "Other",
"experiment_ontology": [
{
"experiment_ontology": {
"id": "http://www.ebi.ac.uk/efo/EFO_0002692",
"label": "ChIP-seq"
}
],
},
"library_strategy": "ChIP-Seq",
"library_source": "Genomic",
"library_selection": "Random",
"library_layout": "Single",
"library_id": "lib_id_02",
"ga4gh_template_id": "v1.0",
"extraction_protocol": "NGS",
"molecule": "genomic DNA",
"molecule_ontology": [
{
"molecule_ontology": {
"id": "SO:0000991",
"label": "genomic DNA"
}
],
},
"instrument": {
"identifier": "instrument:02",
"model": "Illumina",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,20 @@
"biosample": "sample1ohno!",
"study_type": "Epigenomics",
"experiment_type": "Other",
"experiment_ontology": [
{
"experiment_ontology": {
"id": "http://www.ebi.ac.uk/efo/EFO_0002692",
"label": "ChIP-seq"
}
],
},
"library_strategy": "ChIP-Seq",
"library_source": "Genomic",
"library_selection": "Random",
"library_layout": "Single",
"extraction_protocol": "NGS",
"molecule": "genomic DNA",
"molecule_ontology": [
{
"molecule_ontology": {
"id": "SO:0000991",
"label": "genomic DNA"
}
],
},
"experiment_results": [
{
"identifier": "sample1_01",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,20 @@
"biosample": "sample1",
"study_type": "Epigenomics",
"experiment_type": "Other",
"experiment_ontology": [
{
"experiment_ontology": {
"id": "http://www.ebi.ac.uk/efo/EFO_0002692",
"label": "ChIP-seq"
}
],
},
"library_strategy": "ChIP-Seq",
"library_source": "Genomic",
"library_selection": "Random",
"library_layout": "Single",
"extraction_protocol": "NGS",
"molecule": "genomic DNA",
"molecule_ontology": [
{
"molecule_ontology": {
"id": "SO:0000991",
"label": "genomic DNA"
}
],
},
"experiment_results": [
{
"identifier": "sample1_01",
Expand Down
13 changes: 11 additions & 2 deletions chord_metadata_service/experiments/descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from chord_metadata_service.restapi.description_utils import EXTRA_PROPERTIES, ontology_class

DEFAULT_GA4GH_TEMPLATE_ID = "v1.0"

EXPERIMENT = {
"description": "Experiment related metadata.",
"properties": {
Expand All @@ -15,14 +17,14 @@
"Histone H3K4me1, etc.).",
"experiment_ontology": {
"description": "Links to experiment ontology information (e.g. via the OBI ontology.).",
"items": ontology_class("describing the experiment"),
**ontology_class("describing the experiment"),
},
"molecule": "(Controlled Vocabulary) The type of molecule that was extracted from the biological material."
"Include one of the following: total RNA, polyA RNA, cytoplasmic RNA, nuclear RNA, small RNA, "
"genomic DNA, protein, or other.",
"molecule_ontology": {
"description": "Links to molecule ontology information (e.g. via the SO ontology.).",
"items": ontology_class("describing a molecular property"),
**ontology_class("describing a molecular property"),
},
"library_strategy": "(Controlled Vocabulary) The assay used. These are defined within the SRA metadata "
"specifications with a controlled vocabulary (e.g. Bisulfite-Seq, RNA-Seq, ChIP-Seq)."
Expand All @@ -33,6 +35,12 @@
"library_selection": "Method used to enrich the target in the sequence library preparation. "
"E.g. Random, PCR, Random PCR, RT-PCR, MF and other.",
"library_layout": "The library layout. E.g. Single, Paired.",
"library_id": "The identifier associated with the sequencing library.",
"library_extract_id": "Optional identifier for a given specific extraction.",
"insert_size": "The size of the DNA fragment inserted into the vector (Integer).",
"description": "Free-text experimental design description.",
"protocol_url": "URL to the sequencing protocol.",
"library_description": "Specific free-text details about library construction.",
"extraction_protocol": "The protocol used to isolate the extract material.",
"reference_registry_id": "The IHEC EpiRR ID for this dataset, only for IHEC Reference Epigenome datasets. "
"Otherwise leave empty.",
Expand All @@ -42,6 +50,7 @@
},
"experiment_results": "Related files containing the analysis of sequencing data.",
"instrument": "The instrument used to sequence the biological specimens.",
"ga4gh_template_id": "An ID referring to the GA4GH template used for this experiment (e.g. v1.0).",
"biosample": "Biosample on which this experiment was done.",
**EXTRA_PROPERTIES
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Generated by Django 5.1.15 on 2026-01-14 15:46

from django.db import migrations, models
import chord_metadata_service.restapi.validators

# --- DATA MIGRATION FUNCTIONS ---

def convert_list_to_object(apps, schema_editor):
"""
Iterates through all experiments. If ontology fields are lists,
converts them to a single object (the first item) or an empty dict.
"""
# retrieve the historical version of the model
Experiment = apps.get_model('experiments', 'Experiment')

for exp in Experiment.objects.all():
# fix molecule_ontology
mol_data = exp.molecule_ontology
if isinstance(mol_data, list):
# Take the first element if exists, otherwise empty dict
exp.molecule_ontology = mol_data[0] if len(mol_data) > 0 else {}

# fix experiment_ontology
exp_data = exp.experiment_ontology
if isinstance(exp_data, list):
exp.experiment_ontology = exp_data[0] if len(exp_data) > 0 else {}

exp.save()

def reverse_object_to_list(apps, schema_editor):
"""
Reverse logic: wraps the single object back into a list
to restore the legacy schema structure if needed.
"""
Experiment = apps.get_model('experiments', 'Experiment')

for exp in Experiment.objects.all():
# Revert molecule_ontology
mol_data = exp.molecule_ontology
if isinstance(mol_data, dict):
exp.molecule_ontology = [mol_data] if mol_data else []

# Revert experiment_ontology
exp_data = exp.experiment_ontology
if isinstance(exp_data, dict):
exp.experiment_ontology = [exp_data] if exp_data else []

exp.save()

# --- MIGRATION CLASS ---

class Migration(migrations.Migration):

dependencies = [
('experiments', '0013_experiment_fts_extra_experimentresult_fts_extra'),
]

operations = [
# Add new fields
migrations.AddField(
model_name='experiment',
name='description',
field=models.TextField(blank=True, help_text='Free-text experimental design description.', null=True),
),
migrations.AddField(
model_name='experiment',
name='ga4gh_template_id',
field=models.CharField(blank=True, default='v1.0', help_text='An ID referring to the GA4GH template used for this experiment (e.g. v1.0).', max_length=200, null=True),
),
migrations.AddField(
model_name='experiment',
name='insert_size',
field=models.IntegerField(blank=True, help_text='The size of the DNA fragment inserted into the vector (Integer).', null=True),
),
migrations.AddField(
model_name='experiment',
name='library_description',
field=models.TextField(blank=True, help_text='Specific free-text details about library construction.', null=True),
),
migrations.AddField(
model_name='experiment',
name='library_extract_id',
field=models.CharField(blank=True, help_text='Optional identifier for a given specific extraction.', max_length=200, null=True),
),
migrations.AddField(
model_name='experiment',
name='library_id',
field=models.CharField(blank=True, help_text='The identifier associated with the sequencing library.', max_length=200, null=True),
),
migrations.AddField(
model_name='experiment',
name='protocol_url',
field=models.URLField(blank=True, help_text='URL to the sequencing protocol.', max_length=500, null=True),
),

# transform Lists [] into Dicts
migrations.RunPython(convert_list_to_object, reverse_object_to_list),

migrations.AlterField(
model_name='experiment',
name='experiment_ontology',
field=models.JSONField(blank=True, default=dict, help_text='An ontology term describing the experiment.', validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator(formats=None, schema_ref='ONTOLOGY_CLASS')]),
),
migrations.AlterField(
model_name='experiment',
name='molecule_ontology',
field=models.JSONField(blank=True, default=dict, help_text='An ontology term describing a molecular property.', validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator(formats=None, schema_ref='ONTOLOGY_CLASS')]),
),
]
Loading