Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions bin/fix_acceptance_tests_yml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env python3
"""
Script to transform YAML acceptance tests structure.

This script transforms YAML files from the old format:
tests:
spec: [...]
connection: [...]

To the new format:
acceptance_tests:
spec:
tests: [...]
connection:
tests: [...]
"""

import yaml
import sys
from pathlib import Path
from typing import Dict, Any


class AlreadyUpdatedError(Exception):
"""Exception raised when the YAML file has already been updated."""
pass


def transform(file_path: Path) -> None:
with open(file_path, 'r') as f:
data = yaml.safe_load(f)

if 'acceptance_tests' in data:
raise AlreadyUpdatedError()

if 'tests' not in data:
raise ValueError(f"No 'tests' key found in {file_path}, skipping transformation")

# Extract the tests data
tests_data = data.pop('tests')

if not isinstance(tests_data, dict):
raise ValueError(f"Error: 'tests' key in {file_path} is not a dictionary")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Harden YAML load (empty/non-mapping files) and use UTF-8

Empty YAML evaluates to None and non-mapping roots will raise TypeError on key checks. Shall we guard and add explicit UTF-8, plus clearer error messages, wdyt?

 def transform(file_path: Path) -> None:
-    with open(file_path, 'r') as f:
-        data = yaml.safe_load(f)
-    
-    if 'acceptance_tests' in data:
-        raise AlreadyUpdatedError()
-
-    if 'tests' not in data:
-        raise ValueError(f"No 'tests' key found in {file_path}, skipping transformation")
-    
-    # Extract the tests data
-    tests_data = data.pop('tests')
-    
-    if not isinstance(tests_data, dict):
-        raise ValueError(f"Error: 'tests' key in {file_path} is not a dictionary")
+    with open(file_path, "r", encoding="utf-8") as f:
+        data = yaml.safe_load(f) or {}
+
+    if not isinstance(data, dict):
+        raise ValueError(f"{file_path}: top-level YAML must be a mapping, got {type(data).__name__}")
+
+    if "acceptance_tests" in data:
+        raise AlreadyUpdatedError()
+
+    if "tests" not in data:
+        raise ValueError(f"{file_path}: no 'tests' key; skipping transformation")
+
+    # Extract the tests data
+    tests_data = data.pop("tests") or {}
+
+    if not isinstance(tests_data, dict):
+        raise ValueError(f"{file_path}: 'tests' must be a mapping, got {type(tests_data).__name__}")

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In bin/fix_acceptance_tests_yml.py around lines 29–44, open the file with
explicit UTF-8 (open(file_path, 'r', encoding='utf-8')), call yaml.safe_load,
then guard against empty or non-mapping YAML by checking if data is None and
raising a clear ValueError("Empty YAML file: {file_path}") and then if not
isinstance(data, dict) raise ValueError("Unexpected YAML root type: expected
mapping in {file_path}"); only after those guards check for 'acceptance_tests'
and 'tests' keys and validate that tests_data is a dict, keeping the existing
AlreadyUpdatedError and the other error messages but making them clearer per
above.

# Create the new acceptance_tests structure
data['acceptance_tests'] = {}

# Transform each test type
for test_type, test_content in tests_data.items():
data['acceptance_tests'][test_type] = {'tests': test_content}

# Write back to file with preserved formatting
with open(file_path, 'w') as f:
yaml.dump(data, f, default_flow_style=False, sort_keys=False, indent=2)

print(f"Successfully transformed {file_path}")


def main():
if len(sys.argv) != 2:
print("Usage: python fix_acceptance_tests_yml.py <airbyte_repo_path>")
sys.exit(1)

repo_path = Path(sys.argv[1])

for file_path in repo_path.glob('airbyte-integrations/connectors/source-*/acceptance-test-config.yml'):
try:
transform(file_path)
except AlreadyUpdatedError:
print(f"File {file_path} has already been updated, skipping transformation")
except yaml.YAMLError as e:
print(f"Error parsing YAML file {file_path}: {e}")
except Exception as e:
print(f"Error transforming {file_path}: {e}")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Broaden file search to destinations and .yaml; print errors to stderr

Many connectors are destination-* and some configs use .yaml. Shall we cover both and route errors to stderr, wdyt?

-    for file_path in repo_path.glob('airbyte-integrations/connectors/source-*/acceptance-test-config.yml'):
-        try:
-            transform(file_path)
-        except AlreadyUpdatedError:
-            print(f"File {file_path} has already been updated, skipping transformation")
-        except yaml.YAMLError as e:
-            print(f"Error parsing YAML file {file_path}: {e}")
-        except Exception as e:
-            print(f"Error transforming {file_path}: {e}")
+    patterns = [
+        "airbyte-integrations/connectors/source-*/acceptance-test-config.yml",
+        "airbyte-integrations/connectors/source-*/acceptance-test-config.yaml",
+        "airbyte-integrations/connectors/destination-*/acceptance-test-config.yml",
+        "airbyte-integrations/connectors/destination-*/acceptance-test-config.yaml",
+    ]
+    for pattern in patterns:
+        for file_path in repo_path.glob(pattern):
+            try:
+                transform(file_path)
+            except AlreadyUpdatedError:
+                print(f"File {file_path} has already been updated, skipping transformation")
+            except yaml.YAMLError as e:
+                print(f"Error parsing YAML file {file_path}: {e}", file=sys.stderr)
+            except Exception as e:
+                print(f"Error transforming {file_path}: {e}", file=sys.stderr)

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In bin/fix_acceptance_tests_yml.py around lines 66 to 75, broaden the file
search to include destination-* connectors and files ending with both .yml and
.yaml, and route error output to stderr; change the glob loop to iterate over a
list of patterns (e.g. for patterns like
'airbyte-integrations/connectors/source-*/acceptance-test-config.yml',
'.../source-*/acceptance-test-config.yaml',
'.../destination-*/acceptance-test-config.yml',
'.../destination-*/acceptance-test-config.yaml') or generate matches with
multiple glob calls, then for each matched file call transform(file_path) and on
exceptions print the same messages to stderr using print(..., file=sys.stderr)
(including AlreadyUpdatedError, yaml.YAMLError as e, and generic Exception as
e).


if __name__ == "__main__":
main()
Loading