Sage-Bionetworks
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/guides/extensions/curator/metadata_curation.md‎
Lines changed: 254 additions & 0 deletions b/‎docs/guides/extensions/curator/metadata_curation.md‎
Lines changed: 254 additions & 0 deletions
diff --git a/‎docs/reference/experimental/async/curator.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/reference/experimental/async/curator.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/reference/experimental/sync/curator.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/reference/experimental/sync/curator.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎synapseclient/core/typing_utils.py‎
Lines changed: 32 additions & 0 deletions b/‎synapseclient/core/typing_utils.py‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎synapseclient/core/upload/multipart_upload_async.py‎
Lines changed: 1 addition & 3 deletions b/‎synapseclient/core/upload/multipart_upload_async.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎synapseclient/core/upload/upload_utils.py‎
Lines changed: 2 additions & 2 deletions b/‎synapseclient/core/upload/upload_utils.py‎
Lines changed: 2 additions & 2 deletions
@@ -51,7 +51,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-22.04, macos-13, windows-2022]
+        os: [ubuntu-22.04, macos-15-intel, windows-2022]
 
         # if changing the below change the run-integration-tests versions and the check-deploy versions
         # Make sure that we are running the integration tests on the first and last versions of the matrix
@@ -486,7 +486,7 @@ jobs:
 
     strategy:
       matrix:
-        os: [ubuntu-24.04, macos-13, windows-2022]
+        os: [ubuntu-24.04, macos-15-intel, windows-2022]
 
         # python versions should be consistent with the strategy matrix and the runs-integration-tests versions
         python: ["3.10", "3.11", "3.12", "3.13", "3.14"]
 
@@ -10,6 +10,7 @@ By following this guide, you will:
 - Create a metadata curation workflow with automatic validation
 - Set up either file-based or record-based metadata collection
 - Configure curation tasks that guide collaborators through metadata entry
+- Retrieve and analyze detailed validation results to identify data quality issues
 
 ## Prerequisites
 
@@ -178,6 +179,256 @@ print(f"  EntityView: {entity_view_id}")
 print(f"  CurationTask: {task_id}")
 ```
 
+## Step 4: Work with metadata and validate (Record-based workflow)
+
+After creating a record-based metadata task, collaborators can enter metadata through the Grid interface. Once metadata entry is complete, you'll want to validate the data against your schema and identify any issues.
+
+### The metadata curation workflow
+
+1. **Data Entry**: Collaborators use the Grid interface (via the curation task link in the Synapse web UI) to enter metadata
+2. **Grid Export**: Export the Grid session back to the RecordSet to save changes (this can be done via the web UI or programmatically)
+3. **Validation**: Retrieve detailed validation results to identify schema violations
+4. **Correction**: Fix any validation errors and repeat as needed
+
+### Creating and exporting a Grid session
+
+Validation results are only generated when a Grid session is exported back to the RecordSet. This triggers Synapse to validate each row against the bound schema. You have two options:
+
+**Option A: Via the Synapse web UI (most common)**
+
+Users can access the curation task through the Synapse web interface, enter/edit data in the Grid, and click the export button. This automatically generates validation results.
+
+**Option B: Programmatically create and export a Grid session**
+
+```python
+from synapseclient import Synapse
+from synapseclient.models import RecordSet
+from synapseclient.models.curation import Grid
+
+syn = Synapse()
+syn.login()
+
+# Get your RecordSet (must have a schema bound)
+record_set = RecordSet(id="syn987654321").get()
+
+# Create a Grid session from the RecordSet
+grid = Grid(record_set_id=record_set.id).create()
+
+# At this point, users can interact with the Grid (either programmatically or via web UI)
+# When ready to save changes and generate validation results, export back to RecordSet
+grid.export_to_record_set()
+
+# Clean up the Grid session
+grid.delete()
+
+# Re-fetch the RecordSet to get the updated validation_file_handle_id
+record_set = RecordSet(id=record_set.id).get()
+```
+
+**Important**: The `validation_file_handle_id` attribute is only populated after a Grid export operation. Until then, `get_detailed_validation_results()` will return `None`.
+
+### Getting detailed validation results
+
+After exporting from a Grid session with a bound schema, Synapse automatically validates each row against the schema and generates a detailed validation report. Here's how to retrieve and analyze those results:
+
+```python
+from synapseclient import Synapse
+from synapseclient.models import RecordSet
+
+syn = Synapse()
+syn.login()
+
+# After Grid export (either via web UI or programmatically)
+# retrieve the updated RecordSet
+record_set = RecordSet(id="syn987654321").get()
+
+# Get detailed validation results as a pandas DataFrame
+validation_results = record_set.get_detailed_validation_results()
+
+if validation_results is not None:
+    print(f"Total rows validated: {len(validation_results)}")
+
+    # Filter for valid and invalid rows
+    valid_rows = validation_results[validation_results['is_valid'] == True]
+    invalid_rows = validation_results[validation_results['is_valid'] == False]
+
+    print(f"Valid rows: {len(valid_rows)}")
+    print(f"Invalid rows: {len(invalid_rows)}")
+
+    # Display details of any validation errors
+    if len(invalid_rows) > 0:
+        print("\nRows with validation errors:")
+        for idx, row in invalid_rows.iterrows():
+            print(f"\nRow {row['row_index']}:")
+            print(f"  Error: {row['validation_error_message']}")
+            print(f"  ValidationError: {row['all_validation_messages']}")
+else:
+    print("No validation results available. The Grid session must be exported to generate validation results.")
+```
+
+### Example: Complete validation workflow for animal study metadata
+
+This example demonstrates the full workflow from creating a curation task through validating the submitted metadata:
+
+```python
+from synapseclient import Synapse
+from synapseclient.extensions.curator import create_record_based_metadata_task, query_schema_registry
+from synapseclient.models import RecordSet
+from synapseclient.models.curation import Grid
+import pandas as pd
+import tempfile
+import os
+import time
+
+syn = Synapse()
+syn.login()
+
+# Step 1: Find the schema
+schema_uri = query_schema_registry(
+    synapse_client=syn,
+    dcc="ad",
+    datatype="IndividualAnimalMetadataTemplate"
+)
+
+# Step 1.5: Create initial test data with validation examples
+# Row 1: VALID - all required fields present and valid
+# Row 2: INVALID - missing required field 'genotype'
+# Row 3: INVALID - invalid enum value for 'sex' ("other" not in enum)
+test_data = pd.DataFrame({
+    "individualID": ["ANIMAL001", "ANIMAL002", "ANIMAL003"],
+    "species": ["Mouse", "Mouse", "Mouse"],
+    "sex": ["female", "male", "other"],  # Row 3: invalid enum
+    "genotype": ["5XFAD", None, "APOE4KI"],  # Row 2: missing required field
+    "genotypeBackground": ["C57BL/6J", "C57BL/6J", "C57BL/6J"],
+    "modelSystemName": ["5XFAD", "5XFAD", "APOE4KI"],
+    "dateBirth": ["2024-01-15", "2024-02-20", "2024-03-10"],
+    "individualIdSource": ["JAX", "JAX", "JAX"],
+})
+
+# Create a temporary CSV file with the test data
+temp_fd, temp_csv = tempfile.mkstemp(suffix=".csv")
+os.close(temp_fd)
+test_data.to_csv(temp_csv, index=False)
+
+# Step 2: Create the curation task (this creates an empty template RecordSet)
+record_set, curation_task, data_grid = create_record_based_metadata_task(
+    synapse_client=syn,
+    project_id="syn123456789",
+    folder_id="syn987654321",
+    record_set_name="AnimalMetadata_Records",
+    record_set_description="Animal study metadata with validation",
+    curation_task_name="AnimalMetadata_Validation_Example",
+    upsert_keys=["individualID"],
+    instructions="Enter metadata for each animal. All required fields must be completed.",
+    schema_uri=schema_uri,
+    bind_schema_to_record_set=True,
+)
+
+time.sleep(10)
+
+print(f"Curation task created with ID: {curation_task.task_id}")
+print(f"RecordSet created with ID: {record_set.id}")
+
+# Step 2.5: Upload the test data to the RecordSet
+record_set = RecordSet(id=record_set.id).get(synapse_client=syn)
+print("\nUploading test data to RecordSet...")
+record_set.path = temp_csv
+record_set = record_set.store(synapse_client=syn)
+print(f"Test data uploaded to RecordSet {record_set.id}")
+
+# Step 3: Collaborators enter data via the web UI, OR you can create/export a Grid programmatically
+# For demonstration, here's the programmatic approach:
+print("\nCreating Grid session for data entry...")
+grid = Grid(record_set_id=record_set.id).create()
+print("Grid session created. Users can now enter data.")
+
+# After data entry is complete (either via web UI or programmatically),
+# export the Grid to generate validation results
+print("\nExporting Grid to RecordSet to generate validation results...")
+grid.export_to_record_set()
+
+# Clean up the Grid session
+grid.delete()
+print("Grid session exported and deleted.")
+
+# Step 4: Refresh the RecordSet to get the latest validation results
+print("\nRefreshing RecordSet to retrieve validation results...")
+record_set = RecordSet(id=record_set.id).get()
+
+# Step 5: Analyze validation results
+validation_df = record_set.get_detailed_validation_results()
+
+if validation_df is not None:
+    # Summary statistics
+    total_rows = len(validation_df)
+    valid_count = (validation_df['is_valid'] == True).sum()  # noqa: E712
+    invalid_count = (validation_df['is_valid'] == False).sum()  # noqa: E712
+
+    print("\n=== Validation Summary ===")
+    print(f"Total records: {total_rows}")
+    print(f"Valid records: {valid_count} ({valid_count}/{total_rows})")
+    print(f"Invalid records: {invalid_count} ({invalid_count}/{total_rows})")
+
+    # Group errors by type for better understanding
+    if invalid_count > 0:
+        invalid_rows = validation_df[validation_df['is_valid'] == False]  # noqa: E712
+
+        # Export detailed error report for review
+        error_report = invalid_rows[['row_index', 'validation_error_message', 'all_validation_messages']]
+        error_report_path = "validation_errors_report.csv"
+        error_report.to_csv(error_report_path, index=False)
+        print(f"\nDetailed error report saved to: {error_report_path}")
+
+        # Show first few errors as examples
+        print("\n=== Sample Validation Errors ===")
+        for idx, row in error_report.head(3).iterrows():
+            print(f"\nRow {row['row_index']}:")
+            print(f"  Error: {row['validation_error_message']}")
+            print(f"  ValidationError: {row['all_validation_messages']}")
+
+# Clean up temporary file
+if os.path.exists(temp_csv):
+    os.unlink(temp_csv)
+```
+
+In this example you would expect to get results like:
+
+```
+=== Sample Validation Errors ===
+
+Row 0:
+  Error: expected type: String, found: Long
+  ValidationError: ["#/dateBirth: expected type: String, found: Long"]
+
+Row 1:
+  Error: 2 schema violations found
+  ValidationError: ["#/genotype: expected type: String, found: Null","#/dateBirth: expected type: String, found: Long"]
+
+Row 2:
+  Error: 2 schema violations found
+  ValidationError: ["#/dateBirth: expected type: String, found: Long","#/sex: other is not a valid enum value"]
+```
+
+**Key points about validation results:**
+
+- **Automatic generation**: Validation results are created automatically when you export data from a Grid session with a bound schema
+- **Row-level detail**: Each row in your RecordSet gets its own validation status and error messages
+- **Multiple violations**: The `all_validation_messages` column contains all schema violations for a row, not just the first one
+- **Iterative correction**: Use the validation results to identify issues, make corrections in the Grid, export again, and re-validate
+
+### When validation results are available
+
+Validation results are only available after:
+1. A JSON schema has been bound to the RecordSet (set `bind_schema_to_record_set=True` when creating the task)
+2. Data has been entered through a Grid session
+3. **The Grid session has been exported back to the RecordSet** - This is the critical step that triggers validation and populates the `validation_file_handle_id` attribute
+
+The export can happen in two ways:
+- **Via the Synapse web UI**: Users click the export/save button in the Grid interface
+- **Programmatically**: Call `grid.export_to_record_set()` after creating a Grid session
+
+If `get_detailed_validation_results()` returns `None`, the most common reason is that the Grid session hasn't been exported yet. Check that `record_set.validation_file_handle_id` is not `None` after exporting.
+
 ## Additional utilities
 
 ### Validate schema binding on folders
@@ -227,6 +478,9 @@ for curation_task in CurationTask.list(
 - [query_schema_registry][synapseclient.extensions.curator.query_schema_registry] - Search for schemas in the registry
 - [create_record_based_metadata_task][synapseclient.extensions.curator.create_record_based_metadata_task] - Create RecordSet-based curation workflows
 - [create_file_based_metadata_task][synapseclient.extensions.curator.create_file_based_metadata_task] - Create EntityView-based curation workflows
+- [RecordSet.get_detailed_validation_results][synapseclient.models.RecordSet.get_detailed_validation_results] - Get detailed validation results for RecordSet data
+- [Grid.create][synapseclient.models.curation.Grid.create] - Create a Grid session from a RecordSet
+- [Grid.export_to_record_set][synapseclient.models.curation.Grid.export_to_record_set] - Export Grid data back to RecordSet and generate validation results
 - [Folder.bind_schema][synapseclient.models.Folder.bind_schema] - Bind schemas to folders
 - [Folder.validate_schema][synapseclient.models.Folder.validate_schema] - Validate folder schema compliance
 - [CurationTask.list][synapseclient.models.CurationTask.list] - List curation tasks in a project
 
@@ -25,6 +25,7 @@ at your own risk.
             - get_async
             - store_async
             - delete_async
+            - get_detailed_validation_results_async
             - get_acl_async
             - get_permissions_async
             - set_permissions_async
 
@@ -25,6 +25,7 @@ at your own risk.
             - get
             - store
             - delete
+            - get_detailed_validation_results
             - get_acl
             - get_permissions
             - set_permissions
 
@@ -0,0 +1,32 @@
+"""Typing utilities for optional dependencies.
+
+This module provides type aliases for optional dependencies like pandas and numpy,
+allowing proper type checking without requiring these packages to be installed.
+"""
+
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    try:
+        from pandas import DataFrame, Series
+    except ImportError:
+        DataFrame = Any  # type: ignore[misc, assignment]
+        Series = Any  # type: ignore[misc, assignment]
+
+    try:
+        import numpy as np
+    except ImportError:
+        np = Any  # type: ignore[misc, assignment]
+
+    try:
+        import networkx as nx
+    except ImportError:
+        nx = Any  # type: ignore[misc, assignment]
+else:
+    # At runtime, use object as a placeholder
+    DataFrame = object
+    Series = object
+    np = object  # type: ignore[misc, assignment]
+    nx = object  # type: ignore[misc, assignment]
+
+__all__ = ["DataFrame", "Series", "np", "nx"]
@@ -79,7 +79,6 @@
     Mapping,
     Optional,
     Tuple,
-    TypeVar,
     Union,
 )
 
@@ -107,6 +106,7 @@
 )
 from synapseclient.core.otel_config import get_tracer
 from synapseclient.core.retry import with_retry_time_based
+from synapseclient.core.typing_utils import DataFrame as DATA_FRAME_TYPE
 from synapseclient.core.upload.upload_utils import (
     copy_md5_fn,
     copy_part_request_body_provider_fn,
@@ -123,8 +123,6 @@
 if TYPE_CHECKING:
     from synapseclient import Synapse
 
-DATA_FRAME_TYPE = TypeVar("pd.DataFrame")
-
 # AWS limits
 MAX_NUMBER_OF_PARTS = 10000
 MIN_PART_SIZE = 5 * MB
 
@@ -3,9 +3,9 @@
 import math
 import re
 from io import BytesIO, StringIO
-from typing import Any, Dict, Optional, TypeVar, Union
+from typing import Any, Dict, Optional, Union
 
-DATA_FRAME_TYPE = TypeVar("pd.DataFrame")
+from synapseclient.core.typing_utils import DataFrame as DATA_FRAME_TYPE
 
 
 def get_partial_dataframe_chunk(