diff --git a/docs/content/docs/computer-sdk/meta.json b/docs/content/docs/computer-sdk/meta.json index 92e146123..0bdf75989 100644 --- a/docs/content/docs/computer-sdk/meta.json +++ b/docs/content/docs/computer-sdk/meta.json @@ -5,6 +5,7 @@ "computers", "commands", "computer-ui", + "tracing-api", "sandboxed-python" ] } diff --git a/docs/content/docs/computer-sdk/tracing-api.mdx b/docs/content/docs/computer-sdk/tracing-api.mdx new file mode 100644 index 000000000..29c1410b6 --- /dev/null +++ b/docs/content/docs/computer-sdk/tracing-api.mdx @@ -0,0 +1,349 @@ +--- +title: Computer Tracing API +description: Record computer interactions for debugging, training, and analysis +--- + +# Computer Tracing API + +The Computer tracing API provides a powerful way to record computer interactions for debugging, training, analysis, and compliance purposes. Inspired by Playwright's tracing functionality, it offers flexible recording options and standardized output formats. + + +The tracing API addresses GitHub issue #299 by providing a unified recording interface that works with any Computer usage pattern, not just ComputerAgent. + + +## Overview + +The tracing API allows you to: + +- Record screenshots at key moments +- Log all API calls and their results +- Capture accessibility tree snapshots +- Add custom metadata +- Export recordings in standardized formats +- Support for both automated and human-in-the-loop workflows + +## Basic Usage + +### Starting and Stopping Traces + +```python +from computer import Computer + +computer = Computer(os_type="macos") +await computer.run() + +# Start tracing with default options +await computer.tracing.start() + +# Perform some operations +await computer.interface.left_click(100, 200) +await computer.interface.type_text("Hello, World!") +await computer.interface.press_key("enter") + +# Stop tracing and save +trace_path = await computer.tracing.stop() +print(f"Trace saved to: {trace_path}") +``` + +### Custom Configuration + +```python +# Start tracing with custom configuration +await computer.tracing.start({ + 'video': False, # Record video frames + 'screenshots': True, # Record screenshots (default: True) + 'api_calls': True, # Record API calls (default: True) + 'accessibility_tree': True, # Record accessibility snapshots + 'metadata': True, # Allow custom metadata (default: True) + 'name': 'my_custom_trace', # Custom trace name + 'path': './my_traces' # Custom output directory +}) + +# Add custom metadata during tracing +await computer.tracing.add_metadata('user_id', 'user123') +await computer.tracing.add_metadata('test_case', 'login_flow') + +# Stop with custom options +trace_path = await computer.tracing.stop({ + 'path': './exports/trace.zip', + 'format': 'zip' # 'zip' or 'dir' +}) +``` + +## Configuration Options + +### Start Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `video` | bool | `False` | Record video frames (future feature) | +| `screenshots` | bool | `True` | Capture screenshots after key actions | +| `api_calls` | bool | `True` | Log all interface method calls | +| `accessibility_tree` | bool | `False` | Record accessibility tree snapshots | +| `metadata` | bool | `True` | Enable custom metadata recording | +| `name` | str | auto-generated | Custom name for the trace | +| `path` | str | auto-generated | Custom directory for trace files | + +### Stop Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `path` | str | auto-generated | Custom output path for final trace | +| `format` | str | `'zip'` | Output format: `'zip'` or `'dir'` | + +## Use Cases + +### Custom Agent Development + +```python +from computer import Computer + +async def test_custom_agent(): + computer = Computer(os_type="linux") + await computer.run() + + # Start tracing for this test session + await computer.tracing.start({ + 'name': 'custom_agent_test', + 'screenshots': True, + 'accessibility_tree': True + }) + + # Your custom agent logic here + screenshot = await computer.interface.screenshot() + await computer.interface.left_click(500, 300) + await computer.interface.type_text("test input") + + # Add context about what the agent is doing + await computer.tracing.add_metadata('action', 'filling_form') + await computer.tracing.add_metadata('confidence', 0.95) + + # Save the trace + trace_path = await computer.tracing.stop() + return trace_path +``` + +### Training Data Collection + +```python +async def collect_training_data(): + computer = Computer(os_type="macos") + await computer.run() + + tasks = [ + "open_browser_and_search", + "create_document", + "send_email" + ] + + for task in tasks: + # Start a new trace for each task + await computer.tracing.start({ + 'name': f'training_{task}', + 'screenshots': True, + 'accessibility_tree': True, + 'metadata': True + }) + + # Add task metadata + await computer.tracing.add_metadata('task_type', task) + await computer.tracing.add_metadata('difficulty', 'beginner') + + # Perform the task (automated or human-guided) + await perform_task(computer, task) + + # Save this training example + await computer.tracing.stop({ + 'path': f'./training_data/{task}.zip' + }) +``` + +### Human-in-the-Loop Recording + +```python +async def record_human_demonstration(): + computer = Computer(os_type="windows") + await computer.run() + + # Start recording human demonstration + await computer.tracing.start({ + 'name': 'human_demo_excel_workflow', + 'screenshots': True, + 'api_calls': True, # Will capture any programmatic actions + 'metadata': True + }) + + print("Trace recording started. Perform your demonstration...") + print("The system will record all computer interactions.") + + # Add metadata about the demonstration + await computer.tracing.add_metadata('demonstrator', 'expert_user') + await computer.tracing.add_metadata('workflow', 'excel_data_analysis') + + # Human performs actions manually or through other tools + # Tracing will still capture any programmatic interactions + + input("Press Enter when demonstration is complete...") + + # Stop and save the demonstration + trace_path = await computer.tracing.stop() + print(f"Human demonstration saved to: {trace_path}") +``` + +### RPA Debugging + +```python +async def debug_rpa_workflow(): + computer = Computer(os_type="linux") + await computer.run() + + # Start tracing with full debugging info + await computer.tracing.start({ + 'name': 'rpa_debug_session', + 'screenshots': True, + 'accessibility_tree': True, + 'api_calls': True + }) + + try: + # Your RPA workflow + await rpa_login_sequence(computer) + await rpa_data_entry(computer) + await rpa_generate_report(computer) + + await computer.tracing.add_metadata('status', 'success') + + except Exception as e: + # Record the error in the trace + await computer.tracing.add_metadata('error', str(e)) + await computer.tracing.add_metadata('status', 'failed') + raise + finally: + # Always save the debug trace + trace_path = await computer.tracing.stop() + print(f"Debug trace saved to: {trace_path}") +``` + +## Output Format + +### Directory Structure + +When using `format='dir'`, traces are saved with this structure: + +``` +trace_20240922_143052_abc123/ +├── trace_metadata.json # Overall trace information +├── event_000001_trace_start.json +├── event_000002_api_call.json +├── event_000003_api_call.json +├── 000001_initial_screenshot.png +├── 000002_after_left_click.png +├── 000003_after_type_text.png +└── event_000004_trace_end.json +``` + +### Metadata Format + +The `trace_metadata.json` contains: + +```json +{ + "trace_id": "trace_20240922_143052_abc123", + "config": { + "screenshots": true, + "api_calls": true, + "accessibility_tree": false, + "metadata": true + }, + "start_time": 1695392252.123, + "end_time": 1695392267.456, + "duration": 15.333, + "total_events": 12, + "screenshot_count": 5, + "events": [...] // All events in chronological order +} +``` + +### Event Format + +Individual events follow this structure: + +```json +{ + "type": "api_call", + "timestamp": 1695392255.789, + "relative_time": 3.666, + "data": { + "method": "left_click", + "args": {"x": 100, "y": 200, "delay": null}, + "result": null, + "error": null, + "screenshot": "000002_after_left_click.png", + "success": true + } +} +``` + +## Integration with ComputerAgent + +The tracing API works seamlessly with existing ComputerAgent workflows: + +```python +from agent import ComputerAgent +from computer import Computer + +# Create computer and start tracing +computer = Computer(os_type="macos") +await computer.run() + +await computer.tracing.start({ + 'name': 'agent_with_tracing', + 'screenshots': True, + 'metadata': True +}) + +# Create agent using the same computer +agent = ComputerAgent( + model="openai/computer-use-preview", + tools=[computer] +) + +# Agent operations will be automatically traced +async for _ in agent.run("open trycua.com and navigate to docs"): + pass + +# Save the combined trace +trace_path = await computer.tracing.stop() +``` + +## Privacy Considerations + +The tracing API is designed with privacy in mind: + +- Clipboard content is not recorded (only content length) +- Screenshots can be disabled +- Sensitive text input can be filtered +- Custom metadata allows you to control what information is recorded + +## Comparison with ComputerAgent Trajectories + +| Feature | ComputerAgent Trajectories | Computer.tracing | +|---------|---------------------------|------------------| +| **Scope** | ComputerAgent only | Any Computer usage | +| **Flexibility** | Fixed format | Configurable options | +| **Custom Agents** | Not supported | Fully supported | +| **Human-in-the-loop** | Limited | Full support | +| **Real-time Control** | No | Start/stop anytime | +| **Output Format** | Agent-specific | Standardized | +| **Accessibility Data** | No | Optional | + +## Best Practices + +1. **Start tracing early**: Begin recording before your main workflow to capture the complete session +2. **Use meaningful names**: Provide descriptive trace names for easier organization +3. **Add contextual metadata**: Include information about what you're testing or demonstrating +4. **Handle errors gracefully**: Always stop tracing in a finally block +5. **Choose appropriate options**: Only record what you need to minimize overhead +6. **Organize output**: Use custom paths to organize traces by project or use case + +The Computer tracing API provides a powerful foundation for recording, analyzing, and improving computer automation workflows across all use cases. \ No newline at end of file diff --git a/examples/tracing_examples.py b/examples/tracing_examples.py new file mode 100644 index 000000000..f6a3efcdd --- /dev/null +++ b/examples/tracing_examples.py @@ -0,0 +1,369 @@ +""" +Examples demonstrating the Computer.tracing API for recording sessions. + +This module shows various use cases for the new Computer.tracing functionality, +including training data collection, debugging, and compliance recording. +""" + +import asyncio +import logging +from pathlib import Path +from computer import Computer +from agent import ComputerAgent + + +async def basic_tracing_example(): + """ + Basic example showing how to use Computer.tracing for recording a simple session. + """ + print("=== Basic Tracing Example ===") + + # Initialize computer + computer = Computer(os_type="macos", provider_type="lume") + await computer.run() + + try: + # Start tracing with basic configuration + await computer.tracing.start({ + 'screenshots': True, + 'api_calls': True, + 'metadata': True, + 'name': 'basic_session' + }) + + print("Tracing started...") + + # Perform some computer operations + await computer.interface.move_cursor(100, 100) + await computer.interface.left_click() + await computer.interface.type_text("Hello, tracing!") + await computer.interface.press_key("enter") + + # Add custom metadata + await computer.tracing.add_metadata("session_type", "basic_demo") + await computer.tracing.add_metadata("user_notes", "Testing basic functionality") + + # Stop tracing and save + trace_path = await computer.tracing.stop({'format': 'zip'}) + print(f"Trace saved to: {trace_path}") + + finally: + await computer.stop() + + +async def agent_tracing_example(): + """ + Example showing how to use tracing with ComputerAgent for enhanced session recording. + """ + print("=== Agent with Tracing Example ===") + + # Initialize computer and agent + computer = Computer(os_type="macos", provider_type="lume") + await computer.run() + + try: + # Start comprehensive tracing + await computer.tracing.start({ + 'screenshots': True, + 'api_calls': True, + 'accessibility_tree': True, # Include accessibility data for training + 'metadata': True, + 'name': 'agent_session' + }) + + # Create agent + agent = ComputerAgent( + model="openai/computer-use-preview", + tools=[computer], + verbosity=logging.INFO + ) + + # Add metadata about the agent session + await computer.tracing.add_metadata("agent_model", "openai/computer-use-preview") + await computer.tracing.add_metadata("task_type", "web_search") + + # Run agent task + async for message in agent.run("Open a web browser and search for 'computer use automation'"): + print(f"Agent: {message}") + + # Stop tracing + trace_path = await computer.tracing.stop({'format': 'zip'}) + print(f"Agent trace saved to: {trace_path}") + + finally: + await computer.stop() + + +async def custom_agent_tracing_example(): + """ + Example showing tracing with custom agent implementations. + """ + print("=== Custom Agent Tracing Example ===") + + computer = Computer(os_type="macos", provider_type="lume") + await computer.run() + + try: + # Start tracing with custom path + trace_dir = Path.cwd() / "custom_traces" / "my_agent_session" + await computer.tracing.start({ + 'screenshots': True, + 'api_calls': True, + 'accessibility_tree': False, + 'metadata': True, + 'path': str(trace_dir) + }) + + # Custom agent logic using direct computer calls + await computer.tracing.add_metadata("session_type", "custom_agent") + await computer.tracing.add_metadata("purpose", "RPA_workflow") + + # Take initial screenshot + screenshot = await computer.interface.screenshot() + + # Simulate RPA workflow + await computer.interface.move_cursor(500, 300) + await computer.interface.left_click() + await computer.interface.type_text("automation workflow test") + + # Add workflow checkpoint + await computer.tracing.add_metadata("checkpoint", "text_input_complete") + + await computer.interface.hotkey("command", "a") # Select all + await computer.interface.hotkey("command", "c") # Copy + + # Stop tracing and save as directory + trace_path = await computer.tracing.stop({'format': 'dir'}) + print(f"Custom agent trace saved to: {trace_path}") + + finally: + await computer.stop() + + +async def training_data_collection_example(): + """ + Example for collecting training data with rich context. + """ + print("=== Training Data Collection Example ===") + + computer = Computer(os_type="macos", provider_type="lume") + await computer.run() + + try: + # Start tracing optimized for training data + await computer.tracing.start({ + 'screenshots': True, # Essential for visual training + 'api_calls': True, # Capture action sequences + 'accessibility_tree': True, # Rich semantic context + 'metadata': True, # Custom annotations + 'name': 'training_session' + }) + + # Add training metadata + await computer.tracing.add_metadata("data_type", "training") + await computer.tracing.add_metadata("task_category", "ui_automation") + await computer.tracing.add_metadata("difficulty", "intermediate") + await computer.tracing.add_metadata("annotator", "human_expert") + + # Simulate human demonstration + await computer.interface.screenshot() # Baseline screenshot + + # Step 1: Navigate to application + await computer.tracing.add_metadata("step", "1_navigate_to_app") + await computer.interface.move_cursor(100, 50) + await computer.interface.left_click() + + # Step 2: Input data + await computer.tracing.add_metadata("step", "2_input_data") + await computer.interface.type_text("training example data") + + # Step 3: Process + await computer.tracing.add_metadata("step", "3_process") + await computer.interface.press_key("tab") + await computer.interface.press_key("enter") + + # Final metadata + await computer.tracing.add_metadata("success", True) + await computer.tracing.add_metadata("completion_time", "45_seconds") + + trace_path = await computer.tracing.stop() + print(f"Training data collected: {trace_path}") + + finally: + await computer.stop() + + +async def debugging_session_example(): + """ + Example for debugging agent behavior with detailed tracing. + """ + print("=== Debugging Session Example ===") + + computer = Computer(os_type="macos", provider_type="lume") + await computer.run() + + try: + # Start tracing for debugging + await computer.tracing.start({ + 'screenshots': True, + 'api_calls': True, + 'accessibility_tree': True, + 'metadata': True, + 'name': 'debug_session' + }) + + # Debug metadata + await computer.tracing.add_metadata("session_type", "debugging") + await computer.tracing.add_metadata("issue", "click_target_detection") + await computer.tracing.add_metadata("expected_behavior", "click_on_button") + + try: + # Problematic sequence that needs debugging + await computer.interface.move_cursor(200, 150) + await computer.interface.left_click() + + # This might fail - let's trace it + await computer.interface.type_text("debug test") + await computer.tracing.add_metadata("action_result", "successful_typing") + + except Exception as e: + # Record the error in tracing + await computer.tracing.add_metadata("error_encountered", str(e)) + await computer.tracing.add_metadata("error_type", type(e).__name__) + print(f"Error occurred: {e}") + + # Stop tracing + trace_path = await computer.tracing.stop() + print(f"Debug trace saved: {trace_path}") + print("Use this trace to analyze the failure and improve the agent") + + finally: + await computer.stop() + + +async def human_in_the_loop_example(): + """ + Example for recording mixed human/agent sessions. + """ + print("=== Human-in-the-Loop Example ===") + + computer = Computer(os_type="macos", provider_type="lume") + await computer.run() + + try: + # Start tracing for hybrid session + await computer.tracing.start({ + 'screenshots': True, + 'api_calls': True, + 'metadata': True, + 'name': 'human_agent_collaboration' + }) + + # Initial agent phase + await computer.tracing.add_metadata("phase", "agent_autonomous") + await computer.tracing.add_metadata("agent_model", "computer-use-preview") + + # Agent performs initial task + await computer.interface.move_cursor(300, 200) + await computer.interface.left_click() + await computer.interface.type_text("automated input") + + # Transition to human intervention + await computer.tracing.add_metadata("phase", "human_intervention") + await computer.tracing.add_metadata("intervention_reason", "complex_ui_element") + + print("Human intervention phase - manual actions will be recorded...") + # At this point, human can take control while tracing continues + + # Simulate human input (in practice, this would be actual human interaction) + await computer.interface.move_cursor(500, 400) + await computer.interface.double_click() + await computer.tracing.add_metadata("human_action", "double_click_complex_element") + + # Back to agent + await computer.tracing.add_metadata("phase", "agent_completion") + await computer.interface.press_key("enter") + + trace_path = await computer.tracing.stop() + print(f"Human-agent collaboration trace saved: {trace_path}") + + finally: + await computer.stop() + + +async def performance_monitoring_example(): + """ + Example for performance monitoring and analysis. + """ + print("=== Performance Monitoring Example ===") + + computer = Computer(os_type="macos", provider_type="lume") + await computer.run() + + try: + # Start tracing for performance analysis + await computer.tracing.start({ + 'screenshots': False, # Skip screenshots for performance + 'api_calls': True, + 'metadata': True, + 'name': 'performance_test' + }) + + # Performance test metadata + await computer.tracing.add_metadata("test_type", "performance_benchmark") + await computer.tracing.add_metadata("expected_duration", "< 30 seconds") + + import time + start_time = time.time() + + # Perform a series of rapid actions + for i in range(10): + await computer.tracing.add_metadata("iteration", i) + await computer.interface.move_cursor(100 + i * 50, 100) + await computer.interface.left_click() + await computer.interface.type_text(f"Test {i}") + await computer.interface.press_key("tab") + + end_time = time.time() + + # Record performance metrics + await computer.tracing.add_metadata("actual_duration", f"{end_time - start_time:.2f} seconds") + await computer.tracing.add_metadata("actions_per_second", f"{40 / (end_time - start_time):.2f}") + + trace_path = await computer.tracing.stop() + print(f"Performance trace saved: {trace_path}") + + finally: + await computer.stop() + + +async def main(): + """ + Run all tracing examples. + """ + print("Computer.tracing API Examples") + print("=" * 50) + + examples = [ + basic_tracing_example, + agent_tracing_example, + custom_agent_tracing_example, + training_data_collection_example, + debugging_session_example, + human_in_the_loop_example, + performance_monitoring_example + ] + + for example in examples: + try: + await example() + print() + except Exception as e: + print(f"Error in {example.__name__}: {e}") + print() + + print("All examples completed!") + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/libs/python/computer/computer/computer.py b/libs/python/computer/computer/computer.py index 441e3ec3f..a6c17100c 100644 --- a/libs/python/computer/computer/computer.py +++ b/libs/python/computer/computer/computer.py @@ -2,6 +2,8 @@ import asyncio from .models import Computer as ComputerConfig, Display from .interface.factory import InterfaceFactory +from .tracing import ComputerTracing +from .tracing_wrapper import TracingInterfaceWrapper import time from PIL import Image import io @@ -197,7 +199,12 @@ def __init__( # Initialize with proper typing - None at first, will be set in run() self._interface = None + self._original_interface = None # Keep reference to original interface + self._tracing_wrapper = None # Tracing wrapper for interface self.use_host_computer_server = use_host_computer_server + + # Initialize tracing + self._tracing = ComputerTracing(self) # Record initialization in telemetry (if enabled) if telemetry_enabled and is_telemetry_enabled(): @@ -248,12 +255,14 @@ async def run(self) -> Optional[str]: # Create the interface with explicit type annotation from .interface.base import BaseComputerInterface - self._interface = cast( + interface = cast( BaseComputerInterface, InterfaceFactory.create_interface_for_os( os=self.os_type, ip_address=ip_address # type: ignore[arg-type] ), ) + self._interface = interface + self._original_interface = interface self.logger.info("Waiting for host computer server to be ready...") await self._interface.wait_for_ready() @@ -464,7 +473,7 @@ async def run(self) -> Optional[str]: # Pass authentication credentials if using cloud provider if self.provider_type == VMProviderType.CLOUD and self.api_key and self.config.name: - self._interface = cast( + interface = cast( BaseComputerInterface, InterfaceFactory.create_interface_for_os( os=self.os_type, @@ -474,13 +483,16 @@ async def run(self) -> Optional[str]: ), ) else: - self._interface = cast( + interface = cast( BaseComputerInterface, InterfaceFactory.create_interface_for_os( os=self.os_type, ip_address=ip_address ), ) + + self._interface = interface + self._original_interface = interface # Wait for the WebSocket interface to be ready self.logger.info("Connecting to WebSocket interface...") @@ -736,7 +748,7 @@ def interface(self): """Get the computer interface for interacting with the VM. Returns: - The computer interface + The computer interface (wrapped with tracing if tracing is active) """ if not hasattr(self, "_interface") or self._interface is None: error_msg = "Computer interface not initialized. Call run() first." @@ -746,8 +758,28 @@ def interface(self): ) raise RuntimeError(error_msg) + # Return tracing wrapper if tracing is active and we have an original interface + if (self._tracing.is_tracing and + hasattr(self, "_original_interface") and + self._original_interface is not None): + # Create wrapper if it doesn't exist or if the original interface changed + if (not hasattr(self, "_tracing_wrapper") or + self._tracing_wrapper is None or + self._tracing_wrapper._original_interface != self._original_interface): + self._tracing_wrapper = TracingInterfaceWrapper(self._original_interface, self._tracing) + return self._tracing_wrapper + return self._interface + @property + def tracing(self) -> ComputerTracing: + """Get the computer tracing instance for recording sessions. + + Returns: + ComputerTracing: The tracing instance + """ + return self._tracing + @property def telemetry_enabled(self) -> bool: """Check if telemetry is enabled for this computer instance. diff --git a/libs/python/computer/computer/tracing.py b/libs/python/computer/computer/tracing.py new file mode 100644 index 000000000..2831cf8b4 --- /dev/null +++ b/libs/python/computer/computer/tracing.py @@ -0,0 +1,325 @@ +""" +Computer tracing functionality for recording sessions. + +This module provides a Computer.tracing API inspired by Playwright's tracing functionality, +allowing users to record computer interactions for debugging, training, and analysis. +""" + +import asyncio +import json +import time +import uuid +import zipfile +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Union +from PIL import Image +import io +import base64 + + +class ComputerTracing: + """ + Computer tracing class that records computer interactions and saves them to disk. + + This class provides a flexible API for recording computer sessions with configurable + options for what to record (screenshots, API calls, video, etc.). + """ + + def __init__(self, computer_instance): + """ + Initialize the tracing instance. + + Args: + computer_instance: The Computer instance to trace + """ + self._computer = computer_instance + self._is_tracing = False + self._trace_config: Dict[str, Any] = {} + self._trace_data: List[Dict[str, Any]] = [] + self._trace_start_time: Optional[float] = None + self._trace_id: Optional[str] = None + self._trace_dir: Optional[Path] = None + self._screenshot_count = 0 + + @property + def is_tracing(self) -> bool: + """Check if tracing is currently active.""" + return self._is_tracing + + async def start(self, config: Optional[Dict[str, Any]] = None) -> None: + """ + Start tracing with the specified configuration. + + Args: + config: Tracing configuration dict with options: + - video: bool - Record video frames (default: False) + - screenshots: bool - Record screenshots (default: True) + - api_calls: bool - Record API calls and results (default: True) + - accessibility_tree: bool - Record accessibility tree snapshots (default: False) + - metadata: bool - Record custom metadata (default: True) + - name: str - Custom trace name (default: auto-generated) + - path: str - Custom trace directory path (default: auto-generated) + """ + if self._is_tracing: + raise RuntimeError("Tracing is already active. Call stop() first.") + + # Set default configuration + default_config = { + 'video': False, + 'screenshots': True, + 'api_calls': True, + 'accessibility_tree': False, + 'metadata': True, + 'name': None, + 'path': None + } + + self._trace_config = {**default_config, **(config or {})} + + # Generate trace ID and directory + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + self._trace_id = self._trace_config.get('name') or f"trace_{timestamp}_{str(uuid.uuid4())[:8]}" + + if self._trace_config.get('path'): + self._trace_dir = Path(self._trace_config['path']) + else: + self._trace_dir = Path.cwd() / "traces" / self._trace_id + + # Create trace directory + self._trace_dir.mkdir(parents=True, exist_ok=True) + + # Initialize trace data + self._trace_data = [] + self._trace_start_time = time.time() + self._screenshot_count = 0 + self._is_tracing = True + + # Record initial metadata + await self._record_event('trace_start', { + 'trace_id': self._trace_id, + 'config': self._trace_config, + 'timestamp': self._trace_start_time, + 'computer_info': { + 'os_type': self._computer.os_type, + 'provider_type': str(self._computer.provider_type), + 'image': self._computer.image + } + }) + + # Take initial screenshot if enabled + if self._trace_config.get('screenshots'): + await self._take_screenshot('initial_screenshot') + + async def stop(self, options: Optional[Dict[str, Any]] = None) -> str: + """ + Stop tracing and save the trace data. + + Args: + options: Stop options dict with: + - path: str - Custom output path for the trace archive + - format: str - Output format ('zip' or 'dir', default: 'zip') + + Returns: + str: Path to the saved trace file or directory + """ + if not self._is_tracing: + raise RuntimeError("Tracing is not active. Call start() first.") + + if self._trace_start_time is None or self._trace_dir is None or self._trace_id is None: + raise RuntimeError("Tracing state is invalid.") + + # Record final metadata + await self._record_event('trace_end', { + 'timestamp': time.time(), + 'duration': time.time() - self._trace_start_time, + 'total_events': len(self._trace_data), + 'screenshot_count': self._screenshot_count + }) + + # Take final screenshot if enabled + if self._trace_config.get('screenshots'): + await self._take_screenshot('final_screenshot') + + # Save trace metadata + metadata_path = self._trace_dir / "trace_metadata.json" + with open(metadata_path, 'w') as f: + json.dump({ + 'trace_id': self._trace_id, + 'config': self._trace_config, + 'start_time': self._trace_start_time, + 'end_time': time.time(), + 'duration': time.time() - self._trace_start_time, + 'total_events': len(self._trace_data), + 'screenshot_count': self._screenshot_count, + 'events': self._trace_data + }, f, indent=2, default=str) + + # Determine output format and path + output_format = options.get('format', 'zip') if options else 'zip' + custom_path = options.get('path') if options else None + + if output_format == 'zip': + # Create zip file + if custom_path: + zip_path = Path(custom_path) + else: + zip_path = self._trace_dir.parent / f"{self._trace_id}.zip" + + await self._create_zip_archive(zip_path) + output_path = str(zip_path) + else: + # Return directory path + if custom_path: + # Move directory to custom path + custom_dir = Path(custom_path) + if custom_dir.exists(): + import shutil + shutil.rmtree(custom_dir) + self._trace_dir.rename(custom_dir) + output_path = str(custom_dir) + else: + output_path = str(self._trace_dir) + + # Reset tracing state + self._is_tracing = False + self._trace_config = {} + self._trace_data = [] + self._trace_start_time = None + self._trace_id = None + self._screenshot_count = 0 + + return output_path + + async def _record_event(self, event_type: str, data: Dict[str, Any]) -> None: + """ + Record a trace event. + + Args: + event_type: Type of event (e.g., 'click', 'type', 'screenshot') + data: Event data + """ + if not self._is_tracing or self._trace_start_time is None or self._trace_dir is None: + return + + event = { + 'type': event_type, + 'timestamp': time.time(), + 'relative_time': time.time() - self._trace_start_time, + 'data': data + } + + self._trace_data.append(event) + + # Save event to individual file for large traces + event_file = self._trace_dir / f"event_{len(self._trace_data):06d}_{event_type}.json" + with open(event_file, 'w') as f: + json.dump(event, f, indent=2, default=str) + + async def _take_screenshot(self, name: str = 'screenshot') -> Optional[str]: + """ + Take a screenshot and save it to the trace. + + Args: + name: Name for the screenshot + + Returns: + Optional[str]: Path to the saved screenshot, or None if screenshots disabled + """ + if not self._trace_config.get('screenshots') or not self._computer.interface or self._trace_dir is None: + return None + + try: + screenshot_bytes = await self._computer.interface.screenshot() + self._screenshot_count += 1 + + screenshot_filename = f"{self._screenshot_count:06d}_{name}.png" + screenshot_path = self._trace_dir / screenshot_filename + + with open(screenshot_path, 'wb') as f: + f.write(screenshot_bytes) + + return str(screenshot_path) + except Exception as e: + # Log error but don't fail the trace + if hasattr(self._computer, 'logger'): + self._computer.logger.warning(f"Failed to take screenshot: {e}") + return None + + async def _create_zip_archive(self, zip_path: Path) -> None: + """ + Create a zip archive of the trace directory. + + Args: + zip_path: Path where to save the zip file + """ + if self._trace_dir is None: + raise RuntimeError("Trace directory is not set") + + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + for file_path in self._trace_dir.rglob('*'): + if file_path.is_file(): + arcname = file_path.relative_to(self._trace_dir) + zipf.write(file_path, arcname) + + async def record_api_call(self, method: str, args: Dict[str, Any], result: Any = None, error: Optional[Exception] = None) -> None: + """ + Record an API call event. + + Args: + method: The method name that was called + args: Arguments passed to the method + result: Result returned by the method + error: Exception raised by the method, if any + """ + if not self._trace_config.get('api_calls'): + return + + # Take screenshot after certain actions if enabled + screenshot_path = None + screenshot_actions = ['left_click', 'right_click', 'double_click', 'type_text', 'press_key', 'hotkey'] + if method in screenshot_actions and self._trace_config.get('screenshots'): + screenshot_path = await self._take_screenshot(f"after_{method}") + + # Record accessibility tree after certain actions if enabled + if method in screenshot_actions and self._trace_config.get('accessibility_tree'): + await self.record_accessibility_tree() + + await self._record_event('api_call', { + 'method': method, + 'args': args, + 'result': str(result) if result is not None else None, + 'error': str(error) if error else None, + 'screenshot': screenshot_path, + 'success': error is None + }) + + async def record_accessibility_tree(self) -> None: + """Record the current accessibility tree if enabled.""" + if not self._trace_config.get('accessibility_tree') or not self._computer.interface: + return + + try: + accessibility_tree = await self._computer.interface.get_accessibility_tree() + await self._record_event('accessibility_tree', { + 'tree': accessibility_tree + }) + except Exception as e: + if hasattr(self._computer, 'logger'): + self._computer.logger.warning(f"Failed to record accessibility tree: {e}") + + async def add_metadata(self, key: str, value: Any) -> None: + """ + Add custom metadata to the trace. + + Args: + key: Metadata key + value: Metadata value + """ + if not self._trace_config.get('metadata'): + return + + await self._record_event('metadata', { + 'key': key, + 'value': value + }) \ No newline at end of file diff --git a/libs/python/computer/computer/tracing_wrapper.py b/libs/python/computer/computer/tracing_wrapper.py new file mode 100644 index 000000000..69aca69f2 --- /dev/null +++ b/libs/python/computer/computer/tracing_wrapper.py @@ -0,0 +1,301 @@ +""" +Tracing wrapper for computer interface that records API calls. +""" + +from typing import Any, Dict, List, Optional, Tuple +from .interface.base import BaseComputerInterface + + +class TracingInterfaceWrapper: + """ + Wrapper class that intercepts computer interface calls and records them for tracing. + """ + + def __init__(self, original_interface: BaseComputerInterface, tracing_instance): + """ + Initialize the tracing wrapper. + + Args: + original_interface: The original computer interface + tracing_instance: The ComputerTracing instance + """ + self._original_interface = original_interface + self._tracing = tracing_instance + + def __getattr__(self, name): + """ + Delegate attribute access to the original interface if not found in wrapper. + """ + return getattr(self._original_interface, name) + + async def _record_call(self, method_name: str, args: Dict[str, Any], result: Any = None, error: Optional[Exception] = None): + """ + Record an API call for tracing. + + Args: + method_name: Name of the method called + args: Arguments passed to the method + result: Result returned by the method + error: Exception raised, if any + """ + if self._tracing.is_tracing: + await self._tracing.record_api_call(method_name, args, result, error) + + # Mouse Actions + async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: + """Perform a left mouse button click.""" + args = {'x': x, 'y': y, 'delay': delay} + error = None + try: + result = await self._original_interface.left_click(x, y, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('left_click', args, None, error) + + async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: + """Perform a right mouse button click.""" + args = {'x': x, 'y': y, 'delay': delay} + error = None + try: + result = await self._original_interface.right_click(x, y, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('right_click', args, None, error) + + async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: + """Perform a double left mouse button click.""" + args = {'x': x, 'y': y, 'delay': delay} + error = None + try: + result = await self._original_interface.double_click(x, y, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('double_click', args, None, error) + + async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None: + """Move the cursor to the specified screen coordinates.""" + args = {'x': x, 'y': y, 'delay': delay} + error = None + try: + result = await self._original_interface.move_cursor(x, y, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('move_cursor', args, None, error) + + async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None: + """Drag from current position to specified coordinates.""" + args = {'x': x, 'y': y, 'button': button, 'duration': duration, 'delay': delay} + error = None + try: + result = await self._original_interface.drag_to(x, y, button, duration, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('drag_to', args, None, error) + + async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None: + """Drag the cursor along a path of coordinates.""" + args = {'path': path, 'button': button, 'duration': duration, 'delay': delay} + error = None + try: + result = await self._original_interface.drag(path, button, duration, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('drag', args, None, error) + + # Keyboard Actions + async def key_down(self, key: str, delay: Optional[float] = None) -> None: + """Press and hold a key.""" + args = {'key': key, 'delay': delay} + error = None + try: + result = await self._original_interface.key_down(key, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('key_down', args, None, error) + + async def key_up(self, key: str, delay: Optional[float] = None) -> None: + """Release a previously pressed key.""" + args = {'key': key, 'delay': delay} + error = None + try: + result = await self._original_interface.key_up(key, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('key_up', args, None, error) + + async def type_text(self, text: str, delay: Optional[float] = None) -> None: + """Type the specified text string.""" + args = {'text': text, 'delay': delay} + error = None + try: + result = await self._original_interface.type_text(text, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('type_text', args, None, error) + + async def press_key(self, key: str, delay: Optional[float] = None) -> None: + """Press and release a single key.""" + args = {'key': key, 'delay': delay} + error = None + try: + result = await self._original_interface.press_key(key, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('press_key', args, None, error) + + async def hotkey(self, *keys: str, delay: Optional[float] = None) -> None: + """Press multiple keys simultaneously (keyboard shortcut).""" + args = {'keys': keys, 'delay': delay} + error = None + try: + result = await self._original_interface.hotkey(*keys, delay=delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('hotkey', args, None, error) + + # Scrolling Actions + async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None: + """Scroll the mouse wheel by specified amounts.""" + args = {'x': x, 'y': y, 'delay': delay} + error = None + try: + result = await self._original_interface.scroll(x, y, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('scroll', args, None, error) + + async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None: + """Scroll down by the specified number of clicks.""" + args = {'clicks': clicks, 'delay': delay} + error = None + try: + result = await self._original_interface.scroll_down(clicks, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('scroll_down', args, None, error) + + async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None: + """Scroll up by the specified number of clicks.""" + args = {'clicks': clicks, 'delay': delay} + error = None + try: + result = await self._original_interface.scroll_up(clicks, delay) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('scroll_up', args, None, error) + + # Screen Actions + async def screenshot(self) -> bytes: + """Take a screenshot.""" + args = {} + error = None + result = None + try: + result = await self._original_interface.screenshot() + return result + except Exception as e: + error = e + raise + finally: + # For screenshots, we don't want to include the raw bytes in the trace args + await self._record_call('screenshot', args, 'screenshot_taken' if result else None, error) + + async def get_screen_size(self) -> Dict[str, int]: + """Get the screen dimensions.""" + args = {} + error = None + result = None + try: + result = await self._original_interface.get_screen_size() + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('get_screen_size', args, result, error) + + async def get_cursor_position(self) -> Dict[str, int]: + """Get the current cursor position on screen.""" + args = {} + error = None + result = None + try: + result = await self._original_interface.get_cursor_position() + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('get_cursor_position', args, result, error) + + # Clipboard Actions + async def copy_to_clipboard(self) -> str: + """Get the current clipboard content.""" + args = {} + error = None + result = None + try: + result = await self._original_interface.copy_to_clipboard() + return result + except Exception as e: + error = e + raise + finally: + # Don't include clipboard content in trace for privacy + await self._record_call('copy_to_clipboard', args, f'content_length_{len(result)}' if result else None, error) + + async def set_clipboard(self, text: str) -> None: + """Set the clipboard content to the specified text.""" + # Don't include clipboard content in trace for privacy + args = {'text_length': len(text)} + error = None + try: + result = await self._original_interface.set_clipboard(text) + return result + except Exception as e: + error = e + raise + finally: + await self._record_call('set_clipboard', args, None, error) \ No newline at end of file diff --git a/tests/test_tracing.py b/tests/test_tracing.py new file mode 100644 index 000000000..f7967b712 --- /dev/null +++ b/tests/test_tracing.py @@ -0,0 +1,310 @@ +""" +Tests for Computer.tracing functionality. +""" + +import pytest +import asyncio +import tempfile +import json +from pathlib import Path +from computer.tracing import ComputerTracing + + +class MockComputer: + """Mock computer for testing tracing functionality.""" + + def __init__(self): + self.os_type = "macos" + self.provider_type = "lume" + self.image = "test-image" + self.interface = MockInterface() + self.logger = MockLogger() + + +class MockInterface: + """Mock interface for testing.""" + + async def screenshot(self): + """Return mock screenshot data.""" + return b"mock_screenshot_data" + + async def get_accessibility_tree(self): + """Return mock accessibility tree.""" + return {"type": "window", "children": []} + + +class MockLogger: + """Mock logger for testing.""" + + def warning(self, message): + print(f"Warning: {message}") + + +@pytest.mark.asyncio +async def test_tracing_start_stop(): + """Test basic start and stop functionality.""" + computer = MockComputer() + tracing = ComputerTracing(computer) + + # Test initial state + assert not tracing.is_tracing + + # Start tracing + with tempfile.TemporaryDirectory() as temp_dir: + await tracing.start({ + 'screenshots': True, + 'api_calls': True, + 'path': temp_dir + }) + + # Test tracing is active + assert tracing.is_tracing + + # Stop tracing + trace_path = await tracing.stop({'format': 'dir'}) + + # Test tracing is stopped + assert not tracing.is_tracing + + # Verify trace directory exists + assert Path(trace_path).exists() + + # Verify metadata file exists + metadata_file = Path(trace_path) / "trace_metadata.json" + assert metadata_file.exists() + + # Verify metadata content + with open(metadata_file) as f: + metadata = json.load(f) + assert 'trace_id' in metadata + assert 'config' in metadata + assert 'start_time' in metadata + assert 'end_time' in metadata + + +@pytest.mark.asyncio +async def test_tracing_api_call_recording(): + """Test API call recording functionality.""" + computer = MockComputer() + tracing = ComputerTracing(computer) + + with tempfile.TemporaryDirectory() as temp_dir: + await tracing.start({ + 'api_calls': True, + 'screenshots': False, + 'path': temp_dir + }) + + # Record an API call + await tracing.record_api_call( + 'left_click', + {'x': 100, 'y': 200}, + result=None, + error=None + ) + + # Record another API call with error + test_error = Exception("Test error") + await tracing.record_api_call( + 'type_text', + {'text': 'test'}, + result=None, + error=test_error + ) + + trace_path = await tracing.stop({'format': 'dir'}) + + # Verify event files were created + trace_dir = Path(trace_path) + event_files = list(trace_dir.glob("event_*_api_call.json")) + assert len(event_files) >= 2 + + # Verify event content + with open(event_files[0]) as f: + event = json.load(f) + assert event['type'] == 'api_call' + assert event['data']['method'] == 'left_click' + assert event['data']['success'] is True + + +@pytest.mark.asyncio +async def test_tracing_metadata(): + """Test metadata recording functionality.""" + computer = MockComputer() + tracing = ComputerTracing(computer) + + with tempfile.TemporaryDirectory() as temp_dir: + await tracing.start({ + 'metadata': True, + 'path': temp_dir + }) + + # Add custom metadata + await tracing.add_metadata('test_key', 'test_value') + await tracing.add_metadata('numeric_key', 42) + await tracing.add_metadata('complex_key', {'nested': 'data'}) + + trace_path = await tracing.stop({'format': 'dir'}) + + # Verify metadata event files + trace_dir = Path(trace_path) + metadata_files = list(trace_dir.glob("event_*_metadata.json")) + assert len(metadata_files) >= 3 + + +@pytest.mark.asyncio +async def test_tracing_screenshots(): + """Test screenshot recording functionality.""" + computer = MockComputer() + tracing = ComputerTracing(computer) + + with tempfile.TemporaryDirectory() as temp_dir: + await tracing.start({ + 'screenshots': True, + 'path': temp_dir + }) + + # Take a screenshot manually + await tracing._take_screenshot('manual_test') + + trace_path = await tracing.stop({'format': 'dir'}) + + # Verify screenshot files + trace_dir = Path(trace_path) + screenshot_files = list(trace_dir.glob("*.png")) + assert len(screenshot_files) >= 2 # Initial + manual + final + + +@pytest.mark.asyncio +async def test_tracing_config_options(): + """Test different configuration options.""" + computer = MockComputer() + tracing = ComputerTracing(computer) + + # Test with minimal config + with tempfile.TemporaryDirectory() as temp_dir: + await tracing.start({ + 'screenshots': False, + 'api_calls': False, + 'metadata': False, + 'path': temp_dir + }) + + await tracing.record_api_call('test_call', {}) + await tracing.add_metadata('test', 'value') + + trace_path = await tracing.stop({'format': 'dir'}) + + # With everything disabled, should only have basic trace events + trace_dir = Path(trace_path) + event_files = list(trace_dir.glob("event_*.json")) + # Should have trace_start and trace_end events only + assert len(event_files) == 2 + + +@pytest.mark.asyncio +async def test_tracing_zip_output(): + """Test zip file output format.""" + computer = MockComputer() + tracing = ComputerTracing(computer) + + with tempfile.TemporaryDirectory() as temp_dir: + await tracing.start({ + 'screenshots': True, + 'api_calls': True, + 'path': temp_dir + }) + + await tracing.record_api_call('test_call', {'arg': 'value'}) + + # Stop with zip format + trace_path = await tracing.stop({'format': 'zip'}) + + # Verify zip file exists + assert Path(trace_path).exists() + assert trace_path.endswith('.zip') + + +@pytest.mark.asyncio +async def test_tracing_accessibility_tree(): + """Test accessibility tree recording.""" + computer = MockComputer() + tracing = ComputerTracing(computer) + + with tempfile.TemporaryDirectory() as temp_dir: + await tracing.start({ + 'accessibility_tree': True, + 'path': temp_dir + }) + + # Record accessibility tree + await tracing.record_accessibility_tree() + + trace_path = await tracing.stop({'format': 'dir'}) + + # Verify accessibility tree event + trace_dir = Path(trace_path) + tree_files = list(trace_dir.glob("event_*_accessibility_tree.json")) + assert len(tree_files) >= 1 + + # Verify content + with open(tree_files[0]) as f: + event = json.load(f) + assert event['type'] == 'accessibility_tree' + assert 'tree' in event['data'] + + +def test_tracing_errors(): + """Test error handling in tracing.""" + computer = MockComputer() + tracing = ComputerTracing(computer) + + # Test stop without start + with pytest.raises(RuntimeError, match="Tracing is not active"): + asyncio.run(tracing.stop()) + + # Test start when already started + async def test_double_start(): + await tracing.start() + with pytest.raises(RuntimeError, match="Tracing is already active"): + await tracing.start() + await tracing.stop() + + asyncio.run(test_double_start()) + + +if __name__ == "__main__": + # Run tests directly + import sys + + async def run_tests(): + """Run all tests manually.""" + tests = [ + test_tracing_start_stop, + test_tracing_api_call_recording, + test_tracing_metadata, + test_tracing_screenshots, + test_tracing_config_options, + test_tracing_zip_output, + test_tracing_accessibility_tree + ] + + print("Running Computer.tracing tests...") + + for test in tests: + try: + await test() + print(f"✓ {test.__name__}") + except Exception as e: + print(f"✗ {test.__name__}: {e}") + + # Run sync tests + try: + test_tracing_errors() + print("✓ test_tracing_errors") + except Exception as e: + print(f"✗ test_tracing_errors: {e}") + + print("Tests completed!") + + asyncio.run(run_tests()) \ No newline at end of file