1
+ """Adapter for interacting with the OmniParser server.
2
+
3
+ This module provides a client for the OmniParser API deployed on AWS.
4
+ """
5
+
6
+ import base64
7
+ import io
8
+ from typing import Dict , List , Any , Optional
9
+
10
+ import requests
11
+ from PIL import Image
12
+
13
+ from openadapt .custom_logger import logger
14
+
15
+
16
+ class OmniParserClient :
17
+ """Client for the OmniParser API."""
18
+
19
+ def __init__ (self , server_url : str ):
20
+ """Initialize the OmniParser client.
21
+
22
+ Args:
23
+ server_url: URL of the OmniParser server
24
+ """
25
+ self .server_url = server_url .rstrip ("/" ) # Remove trailing slash if present
26
+
27
+ def check_server_available (self ) -> bool :
28
+ """Check if the OmniParser server is available.
29
+
30
+ Returns:
31
+ bool: True if server is available, False otherwise
32
+ """
33
+ try :
34
+ probe_url = f"{ self .server_url } /probe/"
35
+ response = requests .get (probe_url , timeout = 5 )
36
+ response .raise_for_status ()
37
+ logger .info ("OmniParser server is available" )
38
+ return True
39
+ except requests .exceptions .RequestException as e :
40
+ logger .error (f"OmniParser server not available: { e } " )
41
+ return False
42
+
43
+ def image_to_base64 (self , image : Image .Image ) -> str :
44
+ """Convert a PIL Image to base64 string.
45
+
46
+ Args:
47
+ image: PIL Image to convert
48
+
49
+ Returns:
50
+ str: Base64 encoded string of the image
51
+ """
52
+ img_byte_arr = io .BytesIO ()
53
+ image .save (img_byte_arr , format = 'PNG' )
54
+ return base64 .b64encode (img_byte_arr .getvalue ()).decode ("utf-8" )
55
+
56
+ def parse_image (self , image : Image .Image ) -> Dict [str , Any ]:
57
+ """Parse an image using the OmniParser service.
58
+
59
+ Args:
60
+ image: PIL Image to parse
61
+
62
+ Returns:
63
+ Dict[str, Any]: Parsed results including UI elements
64
+ """
65
+ if not self .check_server_available ():
66
+ return {"error" : "Server not available" , "parsed_content_list" : []}
67
+
68
+ # Convert image to base64
69
+ base64_image = self .image_to_base64 (image )
70
+
71
+ # Prepare request
72
+ url = f"{ self .server_url } /parse/"
73
+ payload = {"base64_image" : base64_image }
74
+
75
+ try :
76
+ # Make request to API
77
+ response = requests .post (url , json = payload , timeout = 30 )
78
+ response .raise_for_status ()
79
+
80
+ # Parse response
81
+ result = response .json ()
82
+ logger .info (f"OmniParser latency: { result .get ('latency' , 0 ):.2f} seconds" )
83
+ return result
84
+ except requests .exceptions .RequestException as e :
85
+ logger .error (f"Error making request to OmniParser API: { e } " )
86
+ return {"error" : str (e ), "parsed_content_list" : []}
87
+ except Exception as e :
88
+ logger .error (f"Error parsing image with OmniParser: { e } " )
89
+ return {"error" : str (e ), "parsed_content_list" : []}
90
+
91
+
92
+ class OmniParserProvider :
93
+ """Provider for OmniParser services."""
94
+
95
+ def __init__ (self , server_url : Optional [str ] = None ):
96
+ """Initialize OmniParser provider.
97
+
98
+ Args:
99
+ server_url: URL of the OmniParser server (optional)
100
+ """
101
+ self .server_url = server_url or "http://localhost:8000"
102
+ self .client = OmniParserClient (self .server_url )
103
+
104
+ def is_available (self ) -> bool :
105
+ """Check if the OmniParser service is available.
106
+
107
+ Returns:
108
+ bool: True if service is available, False otherwise
109
+ """
110
+ return self .client .check_server_available ()
111
+
112
+ def status (self ) -> Dict [str , Any ]:
113
+ """Check the status of the OmniParser service.
114
+
115
+ Returns:
116
+ Dict[str, Any]: Status information
117
+ """
118
+ is_available = self .is_available ()
119
+ return {
120
+ "services" : [
121
+ {
122
+ "name" : "omniparser" ,
123
+ "status" : "running" if is_available else "stopped" ,
124
+ "url" : self .server_url
125
+ }
126
+ ],
127
+ "is_available" : is_available
128
+ }
129
+
130
+ def deploy (self ) -> bool :
131
+ """Deploy the OmniParser service if not already running.
132
+
133
+ Returns:
134
+ bool: True if successfully deployed or already running, False otherwise
135
+ """
136
+ # Check if already running
137
+ if self .status ()["is_available" ]:
138
+ logger .info ("OmniParser service is already running" )
139
+ return True
140
+
141
+ # Try to deploy using the deployment script
142
+ try :
143
+ from deploy .deploy .models .omniparser .deploy import Deploy
144
+ logger .info ("Deploying OmniParser service..." )
145
+ Deploy .start ()
146
+ return self .status ()["is_available" ]
147
+ except Exception as e :
148
+ logger .error (f"Failed to deploy OmniParser service: { e } " )
149
+ return False
150
+
151
+ def parse_screenshot (self , image_data : bytes ) -> Dict [str , Any ]:
152
+ """Parse a screenshot using OmniParser.
153
+
154
+ Args:
155
+ image_data: Raw image data in bytes
156
+
157
+ Returns:
158
+ Dict[str, Any]: Parsed content with UI elements
159
+ """
160
+ try :
161
+ image = Image .open (io .BytesIO (image_data ))
162
+ return self .client .parse_image (image )
163
+ except Exception as e :
164
+ logger .error (f"Error processing image data: { e } " )
165
+ return {"error" : str (e ), "parsed_content_list" : []}
0 commit comments