statisticsnorway
diff --git a/‎noxfile.py‎
Lines changed: 10 additions & 14 deletions b/‎noxfile.py‎
Lines changed: 10 additions & 14 deletions
diff --git a/‎src/dapla_metadata/datasets/_merge.py‎
Lines changed: 53 additions & 27 deletions b/‎src/dapla_metadata/datasets/_merge.py‎
Lines changed: 53 additions & 27 deletions
diff --git a/‎src/dapla_metadata/datasets/core.py‎
Lines changed: 22 additions & 24 deletions b/‎src/dapla_metadata/datasets/core.py‎
Lines changed: 22 additions & 24 deletions
diff --git a/‎src/dapla_metadata/datasets/utility/constants.py‎
Lines changed: 2 additions & 0 deletions b/‎src/dapla_metadata/datasets/utility/constants.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/dapla_metadata/datasets/utility/utils.py‎
Lines changed: 28 additions & 0 deletions b/‎src/dapla_metadata/datasets/utility/utils.py‎
Lines changed: 28 additions & 0 deletions
@@ -73,20 +73,16 @@ def mypy(session: nox.Session) -> None:
 def tests(session: nox.Session) -> None:
     """Run the test suite."""
     install_with_uv(session, groups=["test"])
-    try:
-        session.run(
-            "coverage",
-            "run",
-            "--parallel",
-            "-m",
-            "pytest",
-            "-o",
-            "pythonpath=",
-            *session.posargs,
-        )
-    finally:
-        if session.interactive:
-            session.notify("coverage", posargs=[])
+    session.run(
+        "coverage",
+        "run",
+        "--parallel",
+        "-m",
+        "pytest",
+        "-o",
+        "pythonpath=",
+        *session.posargs,
+    )
 
 
 @nox.session(python=python_versions[-1], default=False)
 
@@ -9,7 +9,6 @@
 make changes as appropriate.
 """
 
-import copy
 import logging
 import warnings
 from collections.abc import Iterable
@@ -42,7 +41,6 @@
 )
 VARIABLE_RENAME_MESSAGE = "Variables have been renamed in the dataset"
 VARIABLE_ORDER_MESSAGE = "The order of variables in the dataset has changed"
-VARIABLE_DATATYPES_MESSAGE = "Variable datatypes differ"
 VARIABLES_FEWER_MESSAGE = "Dataset has fewer variables than defined in metadata"
 
 
@@ -164,13 +162,6 @@ def check_variables_consistency(
                     == [v.short_name or "" for v in existing_variables],
                 )
             )
-            results.append(
-                DatasetConsistencyStatus(
-                    message=VARIABLE_DATATYPES_MESSAGE,
-                    success=[v.data_type for v in extracted_variables]
-                    == [v.data_type for v in existing_variables],
-                )
-            )
     else:
         results.extend(
             [
@@ -252,6 +243,7 @@ def merge_variables(
     existing_metadata: OptionalDatadocMetadataType,
     extracted_metadata: all_optional_model.DatadocMetadata,
     merged_metadata: all_optional_model.DatadocMetadata,
+    explicitly_defined_metadata_document: bool = True,
 ) -> all_optional_model.DatadocMetadata:
     """Merges variables from the extracted metadata into the existing metadata and updates the merged metadata.
 
@@ -264,6 +256,9 @@ def merge_variables(
         existing_metadata: The metadata object containing the current state of variables.
         extracted_metadata: The metadata object containing new or updated variables to merge.
         merged_metadata: The metadata object that will contain the result of the merge.
+        explicitly_defined_metadata_document: True when the user has supplied a path to a metadata document in addition to
+            the dataset. This is done when re-using metadata from another dataset for convenience. There are some differences
+            in behaviour in this case.
 
     Returns:
         all_optional_model.DatadocMetadata: The `merged_metadata` object containing variables from both `existing_metadata`
@@ -277,24 +272,26 @@ def merge_variables(
         and merged_metadata.variables is not None
     ):
         for extracted in extracted_metadata.variables:
-            existing = next(
+            if existing := next(
                 (
                     existing
                     for existing in existing_metadata.variables
                     if existing.short_name == extracted.short_name
                 ),
                 None,
-            )
-            if existing:
-                existing.id = (
-                    None  # Set to None so that it will be set assigned a fresh ID later
-                )
+            ):
+                if explicitly_defined_metadata_document:
+                    # In this case we're transferring metadata to a new dataset so we must
+                    # assign a new ID
+                    existing.id = None  # Set to None so that it will be set assigned a fresh ID later
                 existing.contains_data_from = (
                     extracted.contains_data_from or existing.contains_data_from
                 )
                 existing.contains_data_until = (
                     extracted.contains_data_until or existing.contains_data_until
                 )
+                # We must ensure that the data type always corresponds to the dataset.
+                existing.data_type = extracted.data_type
                 merged_metadata.variables.append(
                     cast("datadoc_model.all_optional.model.Variable", existing)
                 )
@@ -307,31 +304,60 @@ def merge_variables(
 def merge_metadata(
     extracted_metadata: all_optional_model.DatadocMetadata | None,
     existing_metadata: OptionalDatadocMetadataType,
+    explicitly_defined_metadata_document: bool = True,
 ) -> all_optional_model.DatadocMetadata:
-    if not existing_metadata:
+    """Merge metadata extracted from a dataset with existing metadata from a metadata document.
+
+    There are two cases this function can handle:
+        1. When the user has explicitly supplied the path to another metadata document. This is
+            convenience functionality to allow metadata to be reused and copied from one dataset
+            to another. In this case there is an explicit list of fields which are to be copied
+            over, defined in `DATASET_FIELDS_FROM_EXISTING_METADATA`. We also want to create new
+            IDs for the dataset and all variables.
+
+    Args:
+        existing_metadata: The metadata object containing the current state of variables.
+        extracted_metadata: The metadata object containing new or updated variables to merge.
+        merged_metadata: The metadata object that will contain the result of the merge.
+        explicitly_defined_metadata_document: True when the user has supplied a path to a metadata document in addition to
+            the dataset. This is done when re-using metadata from another dataset for convenience. There are some differences
+            in behaviour in this case.
+
+    Returns:
+        all_optional_model.DatadocMetadata: The `merged_metadata` resulting from merging `existing_metadata`
+        and `extracted_metadata`.
+    """
+    if not existing_metadata or not existing_metadata.dataset:
         logger.warning(
             "No existing metadata found, no merge to perform. Continuing with extracted metadata.",
         )
         return extracted_metadata or all_optional_model.DatadocMetadata()
 
-    if not extracted_metadata:
+    if not extracted_metadata or not extracted_metadata.dataset:
         return cast("all_optional_model.DatadocMetadata", existing_metadata)
 
-    # Use the extracted metadata as a base
-    merged_metadata = all_optional_model.DatadocMetadata(
-        dataset=copy.deepcopy(extracted_metadata.dataset),
-        variables=[],
-    )
-
-    override_dataset_fields(
-        merged_metadata=merged_metadata,
-        existing_metadata=existing_metadata,
-    )
+    if explicitly_defined_metadata_document:
+        # Use the extracted metadata as a base
+        merged_metadata = all_optional_model.DatadocMetadata(
+            dataset=extracted_metadata.dataset.model_dump(),
+            variables=[],
+        )
+        override_dataset_fields(
+            merged_metadata=merged_metadata,
+            existing_metadata=existing_metadata,
+        )
+    else:
+        # Use the existing metadata as a base
+        merged_metadata = all_optional_model.DatadocMetadata(
+            dataset=existing_metadata.dataset.model_dump(),
+            variables=[],
+        )
 
     # Merge variables.
     # For each extracted variable, copy existing metadata into the merged metadata
     return merge_variables(
         existing_metadata=existing_metadata,
         extracted_metadata=extracted_metadata,
         merged_metadata=merged_metadata,
+        explicitly_defined_metadata_document=explicitly_defined_metadata_document,
     )
@@ -35,7 +35,6 @@
 from dapla_metadata.datasets.utility.constants import (
     DEFAULT_SPATIAL_COVERAGE_DESCRIPTION,
 )
-from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
 from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
 from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
 from dapla_metadata.datasets.utility.urn import convert_uris_to_urns
@@ -45,6 +44,8 @@
 from dapla_metadata.datasets.utility.utils import PseudonymizationType
 from dapla_metadata.datasets.utility.utils import VariableListType
 from dapla_metadata.datasets.utility.utils import VariableType
+from dapla_metadata.datasets.utility.utils import build_dataset_path
+from dapla_metadata.datasets.utility.utils import build_metadata_document_path
 from dapla_metadata.datasets.utility.utils import calculate_percentage
 from dapla_metadata.datasets.utility.utils import derive_assessment_from_state
 from dapla_metadata.datasets.utility.utils import get_timestamp_now
@@ -143,7 +144,7 @@ def __init__(
         if dataset_path:
             self.dataset_path = UPath(dataset_path)
             if not metadata_document_path:
-                self.metadata_document = self.build_metadata_document_path(
+                self.metadata_document = build_metadata_document_path(
                     self.dataset_path,
                 )
         if metadata_document_path or dataset_path:
@@ -188,13 +189,14 @@ def _extract_metadata_from_files(self) -> None:
             and self.metadata_document
             and extracted_metadata
             and existing_metadata
-        ) and self.explicitly_defined_metadata_document:
-            self.dataset_consistency_status.extend(
-                check_dataset_consistency(
-                    self.dataset_path,
-                    self.metadata_document,
+        ):
+            if extracted_metadata.dataset and existing_metadata.dataset:
+                self.dataset_consistency_status.extend(
+                    check_dataset_consistency(
+                        UPath(str(extracted_metadata.dataset.file_path)),
+                        UPath(str(existing_metadata.dataset.file_path)),
+                    )
                 )
-            )
             self.dataset_consistency_status.extend(
                 check_variables_consistency(
                     extracted_metadata.variables or [],
@@ -209,10 +211,10 @@ def _extract_metadata_from_files(self) -> None:
             merged_metadata = merge_metadata(
                 extracted_metadata,
                 existing_metadata,
+                explicitly_defined_metadata_document=self.explicitly_defined_metadata_document,
             )
-            # We need to override this so that the document gets saved to the correct
-            # location, otherwise we would overwrite the existing document!
-            self.metadata_document = self.build_metadata_document_path(
+            # Ensure the document path corresponds to the dataset path
+            self.metadata_document = build_metadata_document_path(
                 self.dataset_path,
             )
             self._set_metadata(merged_metadata)
@@ -305,9 +307,15 @@ def _extract_metadata_from_existing_document(
                 datadoc_metadata = fresh_metadata
             if datadoc_metadata is None:
                 return None
-            return self.metadata_model.DatadocMetadata.model_validate_json(
+            existing = self.metadata_model.DatadocMetadata.model_validate_json(
                 json.dumps(datadoc_metadata),
             )
+
+            # Always override the stored dataset path to ensure it matches
+            if existing.dataset:
+                existing.dataset.file_path = str(
+                    self.dataset_path or build_dataset_path(document)
+                )
         except json.JSONDecodeError:
             logger.warning(
                 "Could not open existing metadata file %s. \
@@ -316,6 +324,8 @@ def _extract_metadata_from_existing_document(
                 exc_info=True,
             )
             return None
+        else:
+            return existing
 
     def _extract_subject_field_from_path(
         self,
@@ -403,18 +413,6 @@ def _extract_metadata_from_dataset(
             )
         return metadata
 
-    @staticmethod
-    def build_metadata_document_path(
-        dataset_path: ReadablePathLike,
-    ) -> UPath:
-        """Build the path to the metadata document corresponding to the given dataset.
-
-        Args:
-            dataset_path: Path to the dataset we wish to create metadata for.
-        """
-        dataset_path = UPath(dataset_path)
-        return dataset_path.parent / (dataset_path.stem + METADATA_DOCUMENT_FILE_SUFFIX)
-
     def datadoc_model(
         self,
     ) -> all_optional_model.MetadataContainer | required_model.MetadataContainer:
 
@@ -94,6 +94,8 @@
 
 METADATA_DOCUMENT_FILE_SUFFIX = "__DOC.json"
 
+PARQUET_DATASET_FILE_EXTENSION = ".parquet"
+
 PAPIS_STABLE_IDENTIFIER_TYPE = "FREG_SNR"
 PAPIS_ENCRYPTION_KEY_REFERENCE = "papis-common-key-1"
 DAEAD_ENCRYPTION_KEY_REFERENCE = "ssb-common-key-1"
 
@@ -24,6 +24,7 @@
 from dapla_metadata.datasets.utility.constants import ENCRYPTION_PARAMETER_SNAPSHOT_DATE
 from dapla_metadata.datasets.utility.constants import ENCRYPTION_PARAMETER_STRATEGY
 from dapla_metadata.datasets.utility.constants import ENCRYPTION_PARAMETER_STRATEGY_SKIP
+from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
 from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
 from dapla_metadata.datasets.utility.constants import (
     OBLIGATORY_DATASET_METADATA_IDENTIFIERS,
@@ -42,6 +43,7 @@
 )
 from dapla_metadata.datasets.utility.constants import PAPIS_ENCRYPTION_KEY_REFERENCE
 from dapla_metadata.datasets.utility.constants import PAPIS_STABLE_IDENTIFIER_TYPE
+from dapla_metadata.datasets.utility.constants import PARQUET_DATASET_FILE_EXTENSION
 from dapla_metadata.datasets.utility.enums import EncryptionAlgorithm
 
 if TYPE_CHECKING:
@@ -601,3 +603,29 @@ def read_variables_from_metadata_document(
     ]
 
     return metadata_document_variables
+
+
+def build_metadata_document_path(
+    dataset_path: ReadablePathLike,
+) -> UPath:
+    """Build the path to the metadata document corresponding to the given dataset.
+
+    Args:
+        dataset_path: Path to the dataset we wish to create metadata for.
+    """
+    dataset_path = UPath(dataset_path)
+    return dataset_path.parent / (dataset_path.stem + METADATA_DOCUMENT_FILE_SUFFIX)
+
+
+def build_dataset_path(
+    metadata_document_path: ReadablePathLike,
+) -> UPath:
+    """Build the path to the dataset corresponding to the given metadata document.
+
+    Args:
+        metadata_document_path: Path to the existing metadata document.
+    """
+    return UPath(
+        str(metadata_document_path).replace(METADATA_DOCUMENT_FILE_SUFFIX, "")
+        + PARQUET_DATASET_FILE_EXTENSION
+    )