From 76e6c94a16e6e387e7b8b49a0ba9eb8a688b6906 Mon Sep 17 00:00:00 2001 From: anthony galassi Date: Tue, 28 Jan 2025 11:00:23 -0500 Subject: [PATCH 01/10] slowly moving loading metadata to imports to make packaging easier --- Makefile | 1 - pypet2bids/pypet2bids/helper_functions.py | 14 +- pypet2bids/pypet2bids/metadata.py | 10710 ++++++++++++++++ .../pypet2bids/multiple_spreadsheets.py | 6 +- pypet2bids/pypet2bids/single_spreadsheet.py | 4 +- scripts/schema_json_to_py | 27 + 6 files changed, 10752 insertions(+), 10 deletions(-) create mode 100644 pypet2bids/pypet2bids/metadata.py create mode 100755 scripts/schema_json_to_py diff --git a/Makefile b/Makefile index 67a1470d..b3993c7f 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,6 @@ add: # copies metadata to path included in pypet2bids project to enable packaging of those files w/ poetry buildpackage: - @cp -R metadata/ pypet2bids/pypet2bids/metadata @cp pypet2bids/pyproject.toml pypet2bids/pypet2bids/pyproject.toml @rm -rf pypet2bids/dist @cd pypet2bids && poetry lock && poetry build diff --git a/pypet2bids/pypet2bids/helper_functions.py b/pypet2bids/pypet2bids/helper_functions.py index f8f28f8f..8c48fe20 100644 --- a/pypet2bids/pypet2bids/helper_functions.py +++ b/pypet2bids/pypet2bids/helper_functions.py @@ -40,6 +40,11 @@ from typing import Union from platform import system +try: + import metadata +except ImportError: + from pypet2bids import metadata + parent_dir = pathlib.Path(__file__).parent.resolve() project_dir = parent_dir.parent.parent if "PET2BIDS" not in project_dir.parts: @@ -55,14 +60,13 @@ ) # load bids schema -bids_schema_path = os.path.join(metadata_dir, "schema.json") -schema = json.load(open(bids_schema_path, "r")) +schema = metadata.schema # putting these paths here as they are reused in dcm2niix4pet.py, update_json_pet_file.py, and ecat.py module_folder = Path(__file__).parent.resolve() python_folder = module_folder.parent pet2bids_folder = python_folder.parent -metadata_folder = os.path.join(pet2bids_folder, "metadata") +#metadata_folder = os.path.join(pet2bids_folder, "metadata") loggers = {} @@ -820,9 +824,7 @@ def get_recon_method(ReconstructionMethodString: str) -> dict: dimension = re.search(search_criteria, ReconMethodName)[0] # doing some more manipulation of the recon method name to expand it from not so helpful acronyms - possible_names = load_pet_bids_requirements_json(pet_reconstruction_metadata_json)[ - "reconstruction_names" - ] + possible_names = metadata.PET_reconstruction_methods.get(ReconMethodName, []) # we want to sort the possible names by longest first that we don't break up an acronym prematurely sorted_df = pandas.DataFrame(possible_names).sort_values( diff --git a/pypet2bids/pypet2bids/metadata.py b/pypet2bids/pypet2bids/metadata.py new file mode 100644 index 00000000..025adfe3 --- /dev/null +++ b/pypet2bids/pypet2bids/metadata.py @@ -0,0 +1,10710 @@ +blood_metadata = { + "mandatory": [ + "PlasmaAvail", + "MetaboliteAvail", + ["MetaboliteMethod", "MetaboliteRecoveryCorrectionApplied"], + "WholeBloodAvail", + "DispersionCorrected", + ], + "recommended": [ + ["PlasmaFreeFraction", "PlasmaFreeFractionMethod"], + "WithdrawalRate", + "TubingType", + "TubingLength", + "DispersionConstant", + "Haematocrit", + "BloodDensity", + ], +} +dicom2bids = { + "dcmfields": [ + "Manufacturer", + "ManufacturerModelName", + "Units", + "InstitutionName", + "InstitutionAddress", + "InstitutionalDepartmentName", + "BodyPartExamined", + "MappingResource", + "MappingResourceName", + "CodeMeaning", + "RadionuclideTotalDose", + "RadiopharmaceuticalSpecificActivity", + "RadiopharmaceuticalVolume", + "InterventionDrugName", + "InterventionDrugDose", + "RadiopharmaceuticalStartTime", + "ActualFrameDuration", + "AcquisitionDate", + "RadiopharmaceuticalStopTime", + "ReconstructionMethod", + "ReconstructionMethod", + "ReconstructionMethod", + "ReconstructionMethod", + "ConvolutionKernel", + "ConvolutionKernel", + "AttenuationCorrectionMethod", + "ScatterFractionFactor", + "DoseCalibrationFactor", + "DecayFactor", + ], + "jsonfields": [ + "Manufacturer", + "ManufacturersModelName", + "Units", + "InstitutionName", + "InstitutionAddress", + "InstitutionalDepartmentName", + "BodyPart", + "TracerName", + "TracerName", + "TracerRadionuclide", + "InjectedRadioactivity", + "MolarActivity", + "InjectedVolume", + "PharmaceuticalName", + "PharmaceuticalDoseAmount", + "InjectionStart", + "FrameDuration", + "ScanDate", + "InjectionEnd", + "ReconMethodName", + "ReconMethodParameterLabels", + "ReconMethodParameterUnits", + "ReconMethodParameterValues", + "ReconFilterType", + "ReconFilterSize", + "AttenuationCorrection", + "ScatterFraction", + "DoseCalibrationFactor", + "DecayCorrectionFactor", + ], + "RadionuclideCodes": { + "C-105A": "^11^Carbon", + "C-107A1": "^13^Nitrogen", + "C-1018C": "^14^Oxygen", + "C-B1038": "^15^Oxygen", + "C-111A1": "^18^Fluorine", + "C-155A1": "^22^Sodium", + "C-135A4": "^38^Potassium", + "126605": "^43^Scandium", + "126600": "^44^Scandium", + "C-166A2": "^45^Titanium", + "126601": "^51^Manganese", + "C-130A1": "^52^Iron", + "C-149A1": "^52^Manganese", + "126607": "^52m^Manganese", + "C-127A4": "^60^Copper", + "C-127A1": "^61^Copper", + "C-127A5": "^62^Copper", + "C-141A1": "^62^Zinc", + "C-127A": "^64^Copper ", + "C-131A1": "^66^Gallium", + "C-131A3": "^68^Gallium", + "C-128A2": "^68^Germanium", + "126602": "^70^Arsenic", + "C-115A2": "^72^Arsenic", + "C-116A2": "^73^Selenium", + "C-113A1": "^75^Bromine", + "C-113A2": "^76^Bromine", + "C-113A3": "^77^Bromine", + "C-159A2": "^82^Rubidium", + "C-162A3": "^86^Yttrium", + "C-168A4": "^89^Zirconium", + "126603": "^90^Niobium", + "C-162A7": "^90^Yttrium", + "C-163AA": "^94m^Technetium", + "C-114A5": "^124^Iodine", + "126606": "^152^Terbium", + }, +} +PET_reconstruction_filters = { + "dicom_values": [ + { + "value": "XYZGAUSSIAN3.00", + "ReconFilterSize": 3, + "ReconFilterType": "GAUSSIAN", + } + ] +} +definitions = { + "Radioactivity": { + "definition": "the property of certain nuclei to spontaneously fragment or rearrange, resulting in the emission of radiation" + }, + "Activity": { + "definition": "the number of nuclear decays, occurring in a given quantity of material over a certain time interval, divided by that time interval", + "unit": "Bq", + }, + "Becquerel": { + "definition": "the agreed SI derived unit for the quantity of activity, equal to one disintegration per second", + "symbol": "Bq", + }, + "Curie": { + "definition": "the pre-SI unit for the quantity of activity such 1 Ci = 3.7 10^10 Bq or 1 Bq = 2.7 10^− 11 Ci", + "symbol": "Ci", + }, + "Injected Radioactivity": { + "definition": "Total amount of activity injected into the patient,DICOM Tag (0018,1074) Radionuclide Total Dose", + "unit": "Bq", + }, + "Specific": { + "definition": "a physical property as a function of the mass of the material in question" + }, + "Specific activity": { + "definition": "the measured activity per gram of compound", + "unit": ["Bq/g", "GBq/mg"], + "symbol": "As", + }, + "Molar activity": { + "definition": "the measured activity per mole of compound", + "unit": ["Bq/mol", "GBq/μmol"], + "symbol": "Am", + }, +} +PET_reconstruction_methods = { + "reconstruction_method": [ + { + "contents": "PSF+TOF3i21s", + "subsets": 21, + "iterations": 3, + "ReconMethodName": "Point-Spread Function + Time Of Flight", + "ReconMethodParameterUnits": [None, None], + "ReconMethodParameterLabels": ["subsets", "iterations"], + "ReconMethodParameterValues": [21, 3], + }, + { + "contents": "OP-OSEM3i21s", + "subsets": 21, + "iterations": 3, + "ReconMethodName": "Ordinary Poisson - Ordered Subset Expectation Maximization", + "ReconMethodParameterUnits": [None, None], + "ReconMethodParameterLabels": ["subsets", "iterations"], + "ReconMethodParameterValues": [21, 3], + }, + { + "contents": "OSEM3D-OP-PSFi10s16", + "subsets": 16, + "iterations": 10, + "ReconMethodName": "Ordinary Poisson 3D Ordered Subset Expectation Maximization + Point-Spread Function", + "ReconMethodParameterUnits": [None, None], + "ReconMethodParameterLabels": ["subsets", "iterations"], + "ReconMethodParameterValues": [16, 10], + }, + { + "contents": "OP_OSEM3D", + "ReconMethodName": "Ordinary Poisson 3D Ordered Subset Expectation Maximization", + "ReconMethodParameterUnits": [None, None], + "ReconMethodParameterLabels": ["subsets", "iterations"], + "ReconMethodParameterValues": [None, None], + }, + { + "contents": "LOR-RAMLA", + "subsets": None, + "iterations": None, + "ReconMethodName": "Line Of Response - Row Action Maximum Likelihood", + "ReconMethodParameterUnits": ["none", "none"], + "ReconMethodParameterLabels": ["subsets", "iterations"], + "ReconMethodParameterValues": [None, None], + }, + { + "contents": "3D-RAMLA", + "subsets": None, + "iterations": None, + "ReconMethodName": "3D Row Action Maximum Likelihood", + "ReconMethodParameterUnits": [None, None], + "ReconMethodParameterLabels": ["subsets", "iterations"], + "ReconMethodParameterValues": [None, None], + }, + { + "contents": "3DKinahan-Rogers", + "subsets": None, + "iterations": None, + "ReconMethodName": "3D Reprojection", + "ReconMethodParameterLabels": [], + "ReconMethodParameterValues": [], + "ReconMethodParameterUnits": [], + }, + ], + "reconstruction_names": [ + {"value": "OS", "name": "Ordered Subset"}, + {"value": "OSEM", "name": "Ordered Subset Expectation Maximization"}, + {"value": "LOR", "name": "Line Of Response"}, + {"value": "RAMLA", "name": "Row Action Maximum Likelihood"}, + {"value": "OP", "name": "Ordinary Poisson"}, + {"value": "PSF", "name": "Point-Spread Function modelling"}, + {"value": "TOF", "name": "Time Of Flight"}, + {"value": "TF", "name": "Time Of Flight"}, + {"value": "VPHD", "name": "VUE Point HD"}, + { + "value": "VPHD-S", + "name": "3D Ordered Subset Expectation Maximization with Point-Spread Function modelling", + }, + {"value": "VPFX", "name": "VUE Point HD using Time Of Flight"}, + { + "value": "VPFXS", + "name": "VUE Point HD using Time Of Flight with Point-Spread Function modelling", + }, + {"value": "Q.Clear", "name": "VUE Point HD with regularization (smoothing)"}, + {"value": "BLOB", "name": "3D spherically symmetric basis function"}, + {"value": "FilteredBackProjection", "name": "Filtered Back Projection"}, + {"value": "Kinahan-Rogers", "name": "Reprojection"}, + ], +} +schema = { + "schema_version": "0.6.0", + "bids_version": "1.8.0", + "meta": { + "context": { + "context": { + "type": "object", + "properties": { + "schema": { + "description": "The BIDS specification schema", + "type": "object", + }, + "dataset": { + "description": "Properties and contents of the entire dataset", + "type": "object", + "properties": { + "dataset_description": { + "description": "Contents of /dataset_description.json", + "type": "object", + }, + "files": { + "description": "List of all files in dataset", + "type": "array", + }, + "tree": { + "description": "Tree view of all files in dataset", + "type": "object", + }, + "ignored": { + "description": "Set of ignored files", + "type": "array", + }, + "datatypes": { + "description": "Data types present in the dataset", + "type": "array", + }, + "modalities": { + "description": "Modalities present in the dataset", + "type": "array", + }, + "subjects": { + "description": "Collections of subjects in dataset", + "type": "object", + "properties": { + "sub_dirs": { + "description": "Subjects as determined by sub-*/ directories", + "type": "array", + "items": {"type": "string"}, + }, + "participant_id": { + "description": "The participant_id column of participants.tsv", + "type": "array", + "items": {"type": "string"}, + }, + "phenotype": { + "description": "The union of participant_id columns in phenotype files", + "type": "array", + "items": {"type": "string"}, + }, + }, + }, + }, + }, + "subject": { + "description": "Properties and contents of the current subject", + "type": "object", + "properties": { + "sessions": { + "description": "Collections of sessions in subject", + "type": "object", + "properties": { + "ses_dirs": { + "description": "Sessions as determined by ses-*/ directories", + "type": "array", + "items": {"type": "string"}, + }, + "session_id": { + "description": "The session_id column of sessions.tsv", + "type": "array", + "items": {"type": "string"}, + }, + "phenotype": { + "description": "The union of session_id columns in phenotype files", + "type": "array", + "items": {"type": "string"}, + }, + }, + } + }, + }, + "path": { + "description": "Path of the current file", + "type": "string", + }, + "entities": { + "description": "Entities parsed from the current filename", + "type": "object", + }, + "datatype": { + "description": "Datatype of current file, for examples, anat", + "type": "string", + }, + "suffix": { + "description": "Suffix of current file", + "type": "string", + }, + "extension": { + "description": "Extension of current file including initial dot", + "type": "string", + }, + "modality": { + "description": "Modality of current file, for examples, MRI", + "type": "string", + }, + "sidecar": { + "description": "Sidecar metadata constructed via the inheritance principle", + "type": "object", + }, + "associations": { + "description": "Associated files, indexed by suffix, selected according to the inheritance principle\n", + "type": "object", + "properties": { + "events": { + "description": "Events file", + "type": "object", + "properties": { + "path": { + "description": "Path to associated events file", + "type": "string", + }, + "onset": { + "description": "Contents of the onset column", + "type": "array", + "items": {"type": "string"}, + }, + }, + }, + "aslcontext": { + "description": "ASL context file", + "type": "object", + "properties": { + "path": { + "description": "Path to associated aslcontext file", + "type": "string", + }, + "n_rows": { + "description": "Number of rows in aslcontext.tsv", + "type": "integer", + }, + "volume_type": { + "description": "Contents of the volume_type column", + "type": "array", + "items": {"type": "string"}, + }, + }, + }, + "m0scan": { + "description": "M0 scan file", + "type": "object", + "properties": { + "path": { + "description": "Path to associated M0 scan file", + "type": "string", + } + }, + }, + "magnitude": { + "description": "Magnitude image file", + "type": "object", + "properties": { + "path": { + "description": "Path to associated magnitude file", + "type": "string", + } + }, + }, + "magnitude1": { + "description": "Magnitude1 image file", + "type": "object", + "properties": { + "path": { + "description": "Path to associated magnitude1 file", + "type": "string", + } + }, + }, + "bval": { + "description": "B value file", + "type": "object", + "properties": { + "path": { + "description": "Path to associated bval file", + "type": "string", + }, + "n_cols": { + "description": "Number of columns in bval file", + "type": "integer", + }, + }, + }, + "bvec": { + "description": "B vector file", + "type": "object", + "properties": { + "path": { + "description": "Path to associated bvec file", + "type": "string", + }, + "n_cols": { + "description": "Number of columns in bvec file", + "type": "integer", + }, + }, + }, + "channels": { + "description": "Channels file", + "type": "object", + "properties": { + "path": { + "description": "Path to associated channels file", + "type": "string", + }, + "type": { + "description": "Contents of the type column", + "type": "array", + "items": {"type": "string"}, + }, + }, + }, + "coordsystem": { + "description": "Coordinate system file", + "type": "object", + "properties": { + "path": { + "description": "Path to associated coordsystem file", + "type": "string", + } + }, + }, + }, + }, + "columns": { + "description": "TSV columns, indexed by column header, values are arrays with column contents", + "type": "object", + "additionalProperties": {"type": "array"}, + }, + "json": { + "description": "Contents of the current JSON file", + "type": "object", + }, + "nifti_header": { + "name": "NIfTI Header", + "description": "Parsed contents of NIfTI header referenced elsewhere in schema.", + "type": "object", + "properties": { + "dim_info": { + "name": "Dimension Information", + "description": "Metadata about dimensions data.", + "type": "object", + "properties": { + "freq": { + "name": "Frequency", + "description": "These fields encode which spatial dimension (1, 2, or 3).", + "type": "integer", + }, + "phase": { + "name": "Phase", + "description": "Corresponds to which acquisition dimension for MRI data.", + "type": "integer", + }, + "slice": { + "name": "Slice", + "description": "Slice dimensions.", + "type": "integer", + }, + }, + }, + "dim": { + "name": "Data Dimensions", + "description": "Data seq dimensions.", + "type": "array", + "minItems": 8, + "maxItems": 8, + "items": {"type": "integer"}, + }, + "pixdim": { + "name": "Pixel Dimension", + "description": "Grid spacings (unit per dimension).", + "type": "array", + "minItems": 8, + "maxItems": 8, + "items": {"type": "number"}, + }, + "xyzt_units": { + "name": "XYZT Units", + "description": "Units of pixdim[1..4]", + "type": "object", + "properties": { + "xyz": { + "name": "XYZ Units", + "description": "String representing the unit of voxel spacing.", + "type": "string", + "enum": ["unknown", "meter", "mm", "um"], + }, + "t": { + "name": "Time Unit", + "description": "String representing the unit of inter-volume intervals.", + "type": "string", + "enum": ["unknown", "sec", "msec", "usec"], + }, + }, + }, + "qform_code": { + "name": "qform code", + "description": "Use of the quaternion fields.", + "type": "integer", + }, + "sform_code": { + "name": "sform code", + "description": "Use of the affine fields.", + "type": "integer", + }, + }, + }, + }, + } + }, + "expression_tests": [ + {"expression": "sidecar.MissingValue", "result": None}, + {"expression": "null.anything", "result": None}, + {"expression": "(null)", "result": None}, + {"expression": "null[0]", "result": None}, + {"expression": "null && true", "result": None}, + {"expression": "null || true", "result": None}, + {"expression": "!null", "result": None}, + {"expression": "intersects([], null)", "result": None}, + {"expression": "intersects(null, [])", "result": None}, + {"expression": "match(null, 'pattern')", "result": None}, + {"expression": "match('string', null)", "result": None}, + {"expression": "min(null)", "result": None}, + {"expression": "max(null)", "result": None}, + {"expression": "length(null)", "result": None}, + {"expression": "type(null)", "result": "null"}, + {"expression": "null == false", "result": False}, + {"expression": "null == true", "result": False}, + {"expression": "null != false", "result": True}, + {"expression": "null != true", "result": True}, + {"expression": "null != 1.5", "result": True}, + {"expression": "null == null", "result": True}, + {"expression": "null == 1", "result": False}, + {"expression": '"VolumeTiming" in null', "result": False}, + {"expression": "evaluate(true)", "result": True}, + {"expression": "evaluate(false)", "result": False}, + {"expression": "evaluate(null)", "result": False}, + ], + }, + "objects": { + "columns": { + "HED": { + "name": "HED", + "display_name": "HED Tag", + "description": "Hierarchical Event Descriptor (HED) Tag.\nSee the [HED Appendix](SPEC_ROOT/appendices/hed.md) for details.\n", + "type": "string", + }, + "abbreviation": { + "name": "abbreviation", + "display_name": "Abbreviation", + "description": "The unique label abbreviation\n", + "type": "string", + }, + "acq_time__scans": { + "name": "acq_time", + "display_name": "Scan acquisition time", + "description": "Acquisition time refers to when the first data point in each run was acquired.\nFurthermore, if this header is provided, the acquisition times of all files\nfrom the same recording MUST be identical.\nDatetime format and their anonymization are described in\n[Units](SPEC_ROOT/02-common-principles.md#units).\n", + "type": "string", + "format": "datetime", + }, + "acq_time__sessions": { + "name": "acq_time", + "display_name": "Session acquisition time", + "description": "Acquisition time refers to when the first data point of the first run was acquired.\nDatetime format and their anonymization are described in\n[Units](SPEC_ROOT/02-common-principles.md#units).\n", + "type": "string", + "format": "datetime", + }, + "age": { + "name": "age", + "display_name": "Subject age", + "description": "Numeric value in years (float or integer value).\n", + "type": "number", + "unit": "year", + }, + "cardiac": { + "name": "cardiac", + "display_name": "Cardiac measurement", + "description": "continuous pulse measurement\n", + "type": "number", + }, + "color": { + "name": "color", + "display_name": "Color label", + "description": "Hexadecimal. Label color for visualization.\n", + "type": "string", + "unit": "hexadecimal", + }, + "detector__channels": { + "name": "detector", + "display_name": "Detector Name", + "description": "Name of the detector as specified in the `*_optodes.tsv` file.\n`n/a` for channels that do not contain NIRS signals (for example, acceleration).\n", + "anyOf": [{"type": "string"}, {"type": "string", "enum": ["n/a"]}], + }, + "detector_type": { + "name": "detector_type", + "display_name": "Detector Type", + "description": "The type of detector. Only to be used if the field `DetectorType` in `*_nirs.json` is set to `mixed`.\n", + "anyOf": [{"type": "string"}], + }, + "derived_from": { + "name": "derived_from", + "display_name": "Derived from", + "description": "`sample-