diff --git a/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte_cdk/connector_builder/connector_builder_handler.py index e63c200c1..0f41cab1e 100644 --- a/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -117,11 +117,15 @@ def read_stream( ), ) except Exception as exc: + # - message: user-friendly error for display + # - internal_message: technical details for debugging (including config/catalog) error = AirbyteTracedException.from_exception( exc, - message=filter_secrets( - f"Error reading stream with config={config} and catalog={configured_catalog}: {str(exc)}" - ), + message=filter_secrets(f"Error reading stream {stream_name}: {str(exc)}"), + ) + # Override internal_message to include context for debugging + error.internal_message = filter_secrets( + f"Error reading stream {stream_name} with config={config} and catalog={configured_catalog}: {str(exc)}" ) return error.as_airbyte_message() diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 6f0394407..4b716113c 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -1408,6 +1408,75 @@ def test_read_stream_exception_with_secrets(): assert "super_secret_key" not in response.trace.error.message +def test_read_stream_error_message_does_not_contain_config_and_catalog(): + """ + Test that error messages in read_stream are clean and user-friendly, + without embedding verbose config and catalog information. + + This test verifies that: + 1. The user-facing `message` is clean and doesn't contain config/catalog dumps + 2. The technical `internal_message` still contains full context for debugging + """ + # Create a config and catalog with identifiable content + config = { + "__injected_declarative_manifest": "test_manifest", + "verbose_config_data": "this_should_not_appear_in_user_message", + "api_key": "secret_key_value", + } + catalog = ConfiguredAirbyteCatalog( + streams=[ + ConfiguredAirbyteStream( + stream=AirbyteStream( + name=_stream_name, + json_schema={"properties": {"verbose_catalog_schema": {"type": "string"}}}, + supported_sync_modes=[SyncMode.full_refresh], + ), + sync_mode=SyncMode.full_refresh, + destination_sync_mode=DestinationSyncMode.append, + ) + ] + ) + state = [] + limits = TestLimits() + + # Mock the source + mock_source = MagicMock() + + with patch( + "airbyte_cdk.connector_builder.test_reader.TestReader.run_test_read" + ) as mock_handler: + # Simulate a common error like a datetime parsing error + mock_handler.side_effect = ValueError( + "time data '' does not match format '%Y-%m-%dT%H:%M:%SZ'" + ) + + # Call the read_stream function + response = read_stream(mock_source, config, catalog, state, limits) + + # Verify it's a trace message with an error + assert response.type == Type.TRACE + assert response.trace.type.value == "ERROR" + + # The user-facing message should be clean - no config or catalog dumps + user_message = response.trace.error.message + assert "verbose_config_data" not in user_message + assert "verbose_catalog_schema" not in user_message + assert "__injected_declarative_manifest" not in user_message + + # But it should contain the actual error + stream_name = catalog.streams[0].stream.name + assert ( + user_message + == f"Error reading stream {stream_name}: time data '' does not match format '%Y-%m-%dT%H:%M:%SZ'" + ) + + # The internal message should contain technical details for debugging + internal_message = response.trace.error.internal_message + assert "verbose_config_data" in internal_message + assert "verbose_catalog_schema" in internal_message + assert f"Error reading stream {stream_name} with config=" in internal_message + + def test_full_resolve_manifest(valid_resolve_manifest_config_file): config = copy.deepcopy(RESOLVE_DYNAMIC_STREAM_MANIFEST_CONFIG) command = config["__command"]