BU-ISCIII · Aberdur · Feb 23, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/relecov_tools/build_schema.py b/relecov_tools/build_schema.py
@@ -222,6 +222,11 @@ def _load_laboratory_addresses(self):
         Returns two dictionaries with key in the three special fields:
         - dropdowns[field] ........ list ‘<name> [<city>] [<ccn>]’
         - uniques[field] .......... unique names for schema enum
+
+        NOTE:
+        For RELECOV, laboratory_address.json stores institution names under
+        `collecting_institution`. We intentionally reuse that same source for
+        collecting/submitting/sequencing to keep the three schema enums aligned.
         """
         json_path = os.path.join(
             os.path.dirname(__file__),
@@ -241,12 +246,13 @@ def _load_laboratory_addresses(self):
 
         for ccn, info in lab_data.items():
             city = info.get("geo_loc_city", "").strip()
-
+            name = info.get("collecting_institution", "").strip()
+            if not name:
+                continue
+            dropdown_entry = f"{name} [{city}] [{ccn}]"
             for f in fields:
-                name = info.get(f, "").strip()
-                if name:
-                    dropdowns[f].append(f"{name} [{city}] [{ccn}]")
-                    uniques[f].add(name)
+                dropdowns[f].append(dropdown_entry)
+                uniques[f].add(name)
 
         dropdowns = {k: sorted(v) for k, v in dropdowns.items()}
         uniques = {k: sorted(v) for k, v in uniques.items()}
@@ -455,8 +461,74 @@ def create_schema_draft_template(self):
         )
         return draft_template
 
+    def _cast_example_to_type(
+        self, property_id: str, expected_type: str | None, value: any
+    ) -> any:
+        """Cast a single example value to the declared JSON-schema type when possible."""
+        if not isinstance(expected_type, str):
+            return value
+        expected = expected_type.strip().lower()
+        if expected == "string":
+            return str(value)
+        if expected == "integer":
+            try:
+                parsed_number = float(value)
+            except (TypeError, ValueError):
+                self.log.warning(
+                    "Example value %r for property '%s' does not match expected type 'integer'. Keeping original value.",
+                    value,
+                    property_id,
+                )
+                return value
+            if not parsed_number.is_integer():
+                self.log.warning(
+                    "Example value %r for property '%s' does not match expected type 'integer'. Keeping original value.",
+                    value,
+                    property_id,
+                )
+                return value
+            return int(parsed_number)
+        if expected == "number":
+            try:
+                return float(value)
+            except (TypeError, ValueError):
+                self.log.warning(
+                    "Example value %r for property '%s' does not match expected type 'number'. Keeping original value.",
+                    value,
+                    property_id,
+                )
+                return value
+        if expected == "boolean":
+            if isinstance(value, bool):
+                return value
+            if isinstance(value, str):
+                normalized = value.strip().lower()
+                if normalized in ("true", "1", "yes", "y"):
+                    return True
+                if normalized in ("false", "0", "no", "n"):
+                    return False
+            self.log.warning(
+                "Example value %r for property '%s' does not match expected type 'boolean'. Keeping original value.",
+                value,
+                property_id,
+            )
+            return value
+        return value
+
+    def _cast_examples_to_declared_type(
+        self, property_id: str, expected_type: str | None, values: list[any]
+    ) -> list[any]:
+        return [
+            self._cast_example_to_type(property_id, expected_type, item)
+            for item in values
+        ]
+
     def jsonschema_object(
-        self, property_id: str, property_feature_key: str, value: any
+        self,
+        property_id: str,
+        property_feature_key: str,
+        value: any,
+        expected_type: str | None = None,
     ) -> dict[str, any]:
         """
         Process a property keyword with their value and return a dictionary with fields for a property.
@@ -487,15 +559,25 @@ def jsonschema_object(
                     jsonschema_value[key] = value
             # FIXME multiple examples will always be loaded as str, regardless of actual type
             case "examples", str(value):
-                jsonschema_value = {property_feature_key: value.split("; ")}
+                parsed_examples = value.split("; ")
+                parsed_examples = self._cast_examples_to_declared_type(
+                    property_id, expected_type, parsed_examples
+                )
+                jsonschema_value = {property_feature_key: parsed_examples}
             case "examples", datetime():
                 value = value.strftime("%Y-%m-%dT%H:%M:%S")
                 value = value.replace("T00:00:00", "")
-                jsonschema_value = {property_feature_key: [value]}
+                parsed_examples = self._cast_examples_to_declared_type(
+                    property_id, expected_type, [value]
+                )
+                jsonschema_value = {property_feature_key: parsed_examples}
             case "examples", int(value) | float(value):
                 value = float(value)
-                value = [int(value) if value.is_integer() else value]
-                jsonschema_value = {property_feature_key: value}
+                parsed_examples = [int(value) if value.is_integer() else value]
+                parsed_examples = self._cast_examples_to_declared_type(
+                    property_id, expected_type, parsed_examples
+                )
+                jsonschema_value = {property_feature_key: parsed_examples}
             case "enum", str():
                 jsonschema_value = {"$ref": f"#/$defs/enums/{property_id}"}
             case _, value if not pd.isna(value):
@@ -532,6 +614,14 @@ def handle_properties(self, json_data: dict[str, dict]) -> tuple[dict, dict, dic
         for property_id, db_features_dic in json_data.items():
             is_required = db_features_dic.get("required (Y/N)", "") == "Y"
             has_enum = db_features_dic.get("enum", False)
+            if property_id in [
+                "collecting_institution",
+                "submitting_institution",
+                "sequencing_institution",
+            ]:
+                lab_values = self._lab_uniques.get(property_id, [])
+                if lab_values:
+                    has_enum = "; ".join(lab_values)
 
             # Create empty placeholder
             schema_property[property_id] = {}
@@ -569,6 +659,7 @@ def handle_properties(self, json_data: dict[str, dict]) -> tuple[dict, dict, dic
                             property_id,
                             mapping_features[db_feature_key],
                             db_feature_value,
+                            expected_type=db_features_dic.get("type"),
                         )
                         if std_json_feature:
                             schema_property[property_id].update(std_json_feature)

diff --git a/relecov_tools/conf/configuration.json b/relecov_tools/conf/configuration.json
@@ -88,6 +88,7 @@
             "required_copy_from_other_field"
         ],
         "schema_file": "relecov_schema.json",
+        "cast_values_from_schema": false,
         "unique_sample_id": "sequencing_sample_id",
         "fixed_fields": {
             "study_type": "Whole Genome Sequencing",
@@ -180,7 +181,7 @@
     },
     "upload_to_ena": {
         "ENA_configuration": {},
-        "checklist": "",
+        "checklist": "default_checklist",
         "templates_path": "",
         "tool": {
             "tool_name": "ena-upload-cli",

diff --git a/relecov_tools/conf/initial_config.yaml b/relecov_tools/conf/initial_config.yaml
diff --git a/relecov_tools/conf/read_lab_metadata_heading_default.json b/relecov_tools/conf/read_lab_metadata_heading_default.json
@@ -0,0 +1,57 @@
+[
+  "Organism",
+  "Public Health sample id (SIVIRA)",
+  "Sample ID given by originating laboratory",
+  "Sample ID given by the submitting laboratory",
+  "Sample ID given in the microbiology lab",
+  "Sample ID given if multiple rna-extraction or passages",
+  "Sample ID given for sequencing",
+  "ENA Sample ID",
+  "GISAID Virus Name",
+  "GISAID id",
+  "Originating Laboratory",
+  "Submitting Institution",
+  "Sequencing Institution",
+  "Sample Collection Date",
+  "Sample Received Date",
+  "Purpose of sampling",
+  "Biological Sample Storage Condition",
+  "Specimen source",
+  "Environmental Material",
+  "Environmental System",
+  "Collection Device",
+  "Host",
+  "Host Age Years",
+  "Host Age Months",
+  "Host Gender",
+  "Vaccinated",
+  "Specific medication for treatment or prophylaxis",
+  "Hospitalization",
+  "Admission to intensive care unit",
+  "Death",
+  "Immunosuppression",
+  "Sequencing Date",
+  "Nucleic acid extraction protocol",
+  "Commercial All-in-one library kit",
+  "Library Preparation Kit",
+  "Enrichment Protocol",
+  "If Enrichment Protocol Is Other, Specify",
+  "Enrichment panel/assay",
+  "If Enrichment panel/assay Is Other, Specify",
+  "Enrichment panel/assay version",
+  "Number Of Samples In Run",
+  "Runid",
+  "Sequencing Instrument Model",
+  "Flowcell Kit",
+  "Source material",
+  "Capture method",
+  "Sequencing technique",
+  "Library Layout",
+  "Gene Name 1",
+  "Diagnostic Pcr Ct Value 1",
+  "Gene Name 2",
+  "Diagnostic Pcr Ct Value-2",
+  "Authors",
+  "Sequence file R1",
+  "Sequence file R2"
+]