1
- from typing import Union , List , TYPE_CHECKING
2
- from playwright .sync_api import FrameLocator , ElementHandle , Error
1
+ from typing import Optional , Union , List , TYPE_CHECKING
2
+ from playwright .sync_api import FrameLocator , ElementHandle , Error , Frame
3
3
from bs4 import BeautifulSoup
4
4
from loguru import logger
5
+ from dendrite .sync_api ._api .response .get_element_response import GetElementResponse
5
6
from dendrite .sync_api ._core ._type_spec import PlaywrightPage
6
7
from dendrite .sync_api ._core .dendrite_element import Element
8
+ from dendrite .sync_api ._core .models .response import ElementsResponse
7
9
8
10
if TYPE_CHECKING :
9
11
from dendrite .sync_api ._core .dendrite_page import Page
10
12
from dendrite .sync_api ._core ._js import GENERATE_DENDRITE_IDS_IFRAME_SCRIPT
11
13
from dendrite .sync_api ._dom .util .mild_strip import mild_strip_in_place
12
14
13
15
14
- def expand_iframes (
15
- page : PlaywrightPage ,
16
- page_soup : BeautifulSoup ,
17
- iframe_path : str = "" ,
18
- frame : Union [ElementHandle , None ] = None ,
19
- ):
20
- if frame is None :
21
- iframes = page .query_selector_all ("iframe" )
22
- else :
23
- content_frame = frame .content_frame ()
24
- if not content_frame :
25
- return
26
- iframes = content_frame .query_selector_all ("iframe" )
27
- for iframe in iframes :
28
- iframe_id = iframe .get_attribute ("d-id" )
16
+ def expand_iframes (page : PlaywrightPage , page_soup : BeautifulSoup ):
17
+
18
+ def get_iframe_path (frame : Frame ):
19
+ path_parts = []
20
+ current_frame = frame
21
+ while current_frame .parent_frame is not None :
22
+ iframe_element = current_frame .frame_element ()
23
+ iframe_id = iframe_element .get_attribute ("d-id" )
24
+ if iframe_id is None :
25
+ return None
26
+ path_parts .insert (0 , iframe_id )
27
+ current_frame = current_frame .parent_frame
28
+ return "|" .join (path_parts )
29
+
30
+ for frame in page .frames :
31
+ if frame .parent_frame is None :
32
+ continue
33
+ iframe_element = frame .frame_element ()
34
+ iframe_id = iframe_element .get_attribute ("d-id" )
29
35
if iframe_id is None :
30
36
continue
31
- new_iframe_path = ""
32
- if iframe_path :
33
- new_iframe_path = f"{ iframe_path } |"
34
- new_iframe_path = f"{ new_iframe_path } { iframe_id } "
37
+ iframe_path = get_iframe_path (frame )
38
+ if iframe_path is None :
39
+ continue
35
40
try :
36
- content_frame = iframe .content_frame ()
37
- if content_frame is None :
38
- continue
39
- content_frame .evaluate (
40
- GENERATE_DENDRITE_IDS_IFRAME_SCRIPT , {"frame_path" : new_iframe_path }
41
+ frame .evaluate (
42
+ GENERATE_DENDRITE_IDS_IFRAME_SCRIPT , {"frame_path" : iframe_path }
41
43
)
42
- frame_content = content_frame .content ()
43
- frame_tree = BeautifulSoup (frame_content , "html.parser " )
44
+ frame_content = frame .content ()
45
+ frame_tree = BeautifulSoup (frame_content , "lxml " )
44
46
mild_strip_in_place (frame_tree )
45
47
merge_iframe_to_page (iframe_id , page_soup , frame_tree )
46
- expand_iframes (page , page_soup , new_iframe_path , iframe )
47
48
except Error as e :
48
- logger .debug (f"Error getting content frame for iframe { iframe_id } : { e } " )
49
+ logger .debug (f"Error processing frame { iframe_id } : { e } " )
49
50
continue
50
51
51
52
@@ -57,11 +58,44 @@ def merge_iframe_to_page(iframe_id: str, page: BeautifulSoup, iframe: BeautifulS
57
58
iframe_element .replace_with (iframe )
58
59
59
60
60
- def get_frame_context (
61
- page : PlaywrightPage , iframe_path : str
62
- ) -> Union [FrameLocator , PlaywrightPage ]:
63
- iframe_path_list = iframe_path .split ("|" )
64
- frame_context = page
65
- for iframe_id in iframe_path_list :
66
- frame_context = frame_context .frame_locator (f"[tf623_id='{ iframe_id } ']" )
67
- return frame_context
61
+ def _get_all_elements_from_selector_soup (
62
+ selector : str , soup : BeautifulSoup , page : "Page"
63
+ ) -> List [Element ]:
64
+ dendrite_elements : List [Element ] = []
65
+ elements = soup .select (selector )
66
+ for element in elements :
67
+ frame = page ._get_context (element )
68
+ d_id = element .get ("d-id" , "" )
69
+ locator = frame .locator (f"xpath=//*[@d-id='{ d_id } ']" )
70
+ if not d_id :
71
+ continue
72
+ if isinstance (d_id , list ):
73
+ d_id = d_id [0 ]
74
+ dendrite_elements .append (
75
+ Element (d_id , locator , page .dendrite_browser , page ._browser_api_client )
76
+ )
77
+ return dendrite_elements
78
+
79
+
80
+ def get_elements_from_selectors_soup (
81
+ page : "Page" , soup : BeautifulSoup , res : GetElementResponse , only_one : bool
82
+ ) -> Union [Optional [Element ], List [Element ], ElementsResponse ]:
83
+ if isinstance (res .selectors , dict ):
84
+ result = {}
85
+ for key , selectors in res .selectors .items ():
86
+ for selector in selectors :
87
+ dendrite_elements = _get_all_elements_from_selector_soup (
88
+ selector , soup , page
89
+ )
90
+ if len (dendrite_elements ) > 0 :
91
+ result [key ] = dendrite_elements [0 ]
92
+ break
93
+ return ElementsResponse (result )
94
+ elif isinstance (res .selectors , list ):
95
+ for selector in reversed (res .selectors ):
96
+ dendrite_elements = _get_all_elements_from_selector_soup (
97
+ selector , soup , page
98
+ )
99
+ if len (dendrite_elements ) > 0 :
100
+ return dendrite_elements [0 ] if only_one else dendrite_elements
101
+ return None
0 commit comments