Skip to content

Commit 7ee7375

Browse files
authored
Merge pull request #384 from autoscrape-labs/fix/nested-oopif-iframe-resolution
Fix nested oopif iframe resolution
2 parents 08207bc + 8251e35 commit 7ee7375

11 files changed

Lines changed: 604 additions & 94 deletions

File tree

docs/en/deep-dive/fundamentals/iframes-and-contexts.md

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -322,19 +322,19 @@ Manages browser targets (pages, iframes, workers, etc.).
322322
**Pydoll usage:**
323323

324324
```python
325-
# From pydoll/elements/web_element.py (simplified)
326-
async def _resolve_oopif_by_parent(self, parent_frame_id: str, ...):
327-
"""Resolve an OOPIF using the parent frame id."""
325+
# From pydoll/interactions/iframe.py (simplified)
326+
async def _resolve_oopif_by_parent(self, content_frame_id: str, ...):
327+
"""Resolve an OOPIF using the content frame id."""
328328
browser_handler = ConnectionHandler(...)
329329
targets_response: GetTargetsResponse = await browser_handler.execute_command(
330330
TargetCommands.get_targets()
331331
)
332332
target_infos = targets_response.get('result', {}).get('targetInfos', [])
333-
333+
334334
# Find targets whose parentFrameId matches
335335
direct_children = [
336336
target_info for target_info in target_infos
337-
if target_info.get('parentFrameId') == parent_frame_id
337+
if target_info.get('parentFrameId') == content_frame_id
338338
]
339339

340340
if direct_children:
@@ -617,12 +617,12 @@ sequenceDiagram
617617
**Code**:
618618

619619
```python
620-
# From pydoll/elements/web_element.py
621-
async def _ensure_iframe_context(self) -> None:
622-
"""Initialize and cache context information for iframe elements."""
623-
node_info = await self._describe_node(object_id=self._object_id)
620+
# From pydoll/interactions/iframe.py
621+
async def resolve(self) -> IFrameContext:
622+
"""Resolve and return iframe context."""
624623
base_handler, base_session_id = self._get_base_session()
625-
frame_id, document_url, parent_frame_id, backend_node_id = self._extract_frame_metadata(
624+
node_info = await self._describe_element_node(base_handler, base_session_id)
625+
frame_id, document_url, content_frame_id, backend_node_id = self._extract_frame_metadata(
626626
node_info
627627
)
628628
# ... continue resolution
@@ -637,7 +637,7 @@ def _extract_frame_metadata(
637637
) -> tuple[Optional[str], Optional[str], Optional[str], Optional[int]]:
638638
"""Extract iframe-related metadata from a DOM.describeNode Node."""
639639
content_document = node_info.get('contentDocument') or {}
640-
parent_frame_id = node_info.get('frameId')
640+
content_frame_id = node_info.get('frameId')
641641
backend_node_id = node_info.get('backendNodeId')
642642
frame_id = content_document.get('frameId')
643643
document_url = (
@@ -646,7 +646,7 @@ def _extract_frame_metadata(
646646
or node_info.get('documentURL')
647647
or node_info.get('baseURL')
648648
)
649-
return frame_id, document_url, parent_frame_id, backend_node_id
649+
return frame_id, document_url, content_frame_id, backend_node_id
650650
```
651651

652652
**Outcome**:
@@ -748,32 +748,41 @@ If no suitable direct child is found (or when `parentFrameId` information is inc
748748
1. Iterate all iframe/page targets.
749749
2. Attach to each and fetch its frame tree.
750750
3. First, try to match the **root frame owner** via `DOM.getFrameOwner(root_frame_id)` against our iframe's `backendNodeId`.
751-
4. If that does not match, look for a **child frame** whose `parentId` equals our `parent_frame_id` (this covers cases where the OOPIF is nested under an intermediate frame).
751+
4. If that does not match, look for a **child frame** whose `parentId` equals our `content_frame_id` (this covers cases where the OOPIF is nested under an intermediate frame).
752752

753753
**Code**:
754754

755755
```python
756-
# From pydoll/elements/web_element.py
756+
# From pydoll/interactions/iframe.py
757757
async def _resolve_oopif_by_parent(
758758
self,
759-
parent_frame_id: str,
759+
content_frame_id: str,
760760
backend_node_id: Optional[int],
761+
base_handler: Optional[ConnectionHandler] = None,
762+
base_session_id: Optional[str] = None,
761763
) -> tuple[Optional[ConnectionHandler], Optional[str], Optional[str], Optional[str]]:
762-
"""Resolve an OOPIF using the given parent frame id."""
764+
"""Resolve an OOPIF using the content frame id."""
763765
browser_handler = ConnectionHandler(
764-
connection_port=self._connection_handler._connection_port
766+
connection_port=self._element._connection_handler._connection_port
765767
)
766768
targets_response: GetTargetsResponse = await browser_handler.execute_command(
767769
TargetCommands.get_targets()
768770
)
769771
target_infos = targets_response.get('result', {}).get('targetInfos', [])
770772

773+
# The handler that can resolve DOM.getFrameOwner for the element's context.
774+
# When the <iframe> lives inside a nested OOPIF, the Tab-level handler has
775+
# no visibility; we must route through the session that originally found
776+
# the element.
777+
owner_handler = base_handler or self._element._connection_handler
778+
owner_session_id = base_session_id
779+
771780
# Strategy 3a: Direct children (fast path)
772781
direct_children = [
773782
target_info
774783
for target_info in target_infos
775784
if target_info.get('type') in {'iframe', 'page'}
776-
and target_info.get('parentFrameId') == parent_frame_id
785+
and target_info.get('parentFrameId') == content_frame_id
777786
]
778787

779788
is_single_child = len(direct_children) == 1
@@ -803,7 +812,7 @@ async def _resolve_oopif_by_parent(
803812
# OOPIF case: confirm ownership via DOM.getFrameOwner
804813
if root_frame_id and backend_node_id is not None:
805814
owner_backend_id = await self._owner_backend_for(
806-
self._connection_handler, None, root_frame_id
815+
owner_handler, owner_session_id, root_frame_id
807816
)
808817
if owner_backend_id == backend_node_id:
809818
return (
@@ -830,10 +839,19 @@ async def _resolve_oopif_by_parent(
830839
root_frame = (frame_tree or {}).get('frame', {})
831840
root_frame_id = root_frame.get('id', '')
832841

842+
# Direct match: content_frame_id equals this target's root frame ID
843+
if root_frame_id and root_frame_id == content_frame_id:
844+
return (
845+
browser_handler,
846+
attached_session_id,
847+
root_frame_id,
848+
root_frame.get('url'),
849+
)
850+
833851
# Try matching root owner by backend_node_id
834852
if root_frame_id and backend_node_id is not None:
835853
owner_backend_id = await self._owner_backend_for(
836-
self._connection_handler, None, root_frame_id
854+
owner_handler, owner_session_id, root_frame_id
837855
)
838856
if owner_backend_id == backend_node_id:
839857
return (
@@ -843,8 +861,10 @@ async def _resolve_oopif_by_parent(
843861
root_frame.get('url'),
844862
)
845863

846-
# Fallback: match a child frame whose parentId equals parent_frame_id
847-
child_frame_id = WebElement._find_child_by_parent(frame_tree, parent_frame_id)
864+
# Fallback: match a child frame whose parentId equals content_frame_id
865+
child_frame_id = IFrameContextResolver._find_child_by_parent(
866+
frame_tree, content_frame_id
867+
)
848868
if child_frame_id:
849869
return browser_handler, attached_session_id, child_frame_id, None
850870

@@ -1133,7 +1153,7 @@ direct_children = [
11331153
target_info
11341154
for target_info in target_infos
11351155
if target_info.get('type') in {'iframe', 'page'}
1136-
and target_info.get('parentFrameId') == parent_frame_id
1156+
and target_info.get('parentFrameId') == content_frame_id
11371157
]
11381158
if direct_children:
11391159
# Attach immediately, skip scanning all targets

docs/pt/deep-dive/fundamentals/iframes-and-contexts.md

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -322,19 +322,19 @@ Gerencia alvos (targets) do navegador (páginas, iframes, workers, etc.).
322322
**Uso no Pydoll:**
323323

324324
```python
325-
# De pydoll/elements/web_element.py (simplificado)
326-
async def _resolve_oopif_by_parent(self, parent_frame_id: str, ...):
327-
"""Resolve um OOPIF usando o ID do frame pai."""
325+
# De pydoll/interactions/iframe.py (simplificado)
326+
async def _resolve_oopif_by_parent(self, content_frame_id: str, ...):
327+
"""Resolve um OOPIF usando o content frame id."""
328328
browser_handler = ConnectionHandler(...)
329329
targets_response: GetTargetsResponse = await browser_handler.execute_command(
330330
TargetCommands.get_targets()
331331
)
332332
target_infos = targets_response.get('result', {}).get('targetInfos', [])
333-
333+
334334
# Encontra alvos cujo parentFrameId bate
335335
direct_children = [
336336
target_info for target_info in target_infos
337-
if target_info.get('parentFrameId') == parent_frame_id
337+
if target_info.get('parentFrameId') == content_frame_id
338338
]
339339

340340
if direct_children:
@@ -617,12 +617,12 @@ sequenceDiagram
617617
**Código**:
618618

619619
```python
620-
# De pydoll/elements/web_element.py
621-
async def _ensure_iframe_context(self) -> None:
622-
"""Inicializa e cacheia informação de contexto para elementos iframe."""
623-
node_info = await self._describe_node(object_id=self._object_id)
620+
# De pydoll/interactions/iframe.py
621+
async def resolve(self) -> IFrameContext:
622+
"""Resolve e retorna o contexto do iframe."""
624623
base_handler, base_session_id = self._get_base_session()
625-
frame_id, document_url, parent_frame_id, backend_node_id = self._extract_frame_metadata(
624+
node_info = await self._describe_element_node(base_handler, base_session_id)
625+
frame_id, document_url, content_frame_id, backend_node_id = self._extract_frame_metadata(
626626
node_info
627627
)
628628
# ... continua resolução
@@ -637,7 +637,7 @@ def _extract_frame_metadata(
637637
) -> tuple[Optional[str], Optional[str], Optional[str], Optional[int]]:
638638
"""Extrai metadados relacionados a iframe de um Nó DOM.describeNode."""
639639
content_document = node_info.get('contentDocument') or {}
640-
parent_frame_id = node_info.get('frameId')
640+
content_frame_id = node_info.get('frameId')
641641
backend_node_id = node_info.get('backendNodeId')
642642
frame_id = content_document.get('frameId')
643643
document_url = (
@@ -646,7 +646,7 @@ def _extract_frame_metadata(
646646
or node_info.get('documentURL')
647647
or node_info.get('baseURL')
648648
)
649-
return frame_id, document_url, parent_frame_id, backend_node_id
649+
return frame_id, document_url, content_frame_id, backend_node_id
650650
```
651651

652652
**Resultado**:
@@ -748,32 +748,41 @@ Se nenhum filho direto adequado for encontrado (ou se `parentFrameId` estiver in
748748
1. Iterar todos os alvos iframe/page.
749749
2. Anexar a cada um e buscar sua árvore de frames.
750750
3. Primeiro, tentar casar o **dono do frame raiz** via `DOM.getFrameOwner(root_frame_id)` com o `backendNodeId` do iframe.
751-
4. Se isso não bater, procurar um **frame filho** cujo `parentId` seja o `parent_frame_id` (isso cobre casos em que o OOPIF está aninhado sob um frame intermediário).
751+
4. Se isso não bater, procurar um **frame filho** cujo `parentId` seja o `content_frame_id` (isso cobre casos em que o OOPIF está aninhado sob um frame intermediário).
752752

753753
**Código**:
754754

755755
```python
756-
# De pydoll/elements/web_element.py
756+
# De pydoll/interactions/iframe.py
757757
async def _resolve_oopif_by_parent(
758758
self,
759-
parent_frame_id: str,
759+
content_frame_id: str,
760760
backend_node_id: Optional[int],
761+
base_handler: Optional[ConnectionHandler] = None,
762+
base_session_id: Optional[str] = None,
761763
) -> tuple[Optional[ConnectionHandler], Optional[str], Optional[str], Optional[str]]:
762-
"""Resolve um OOPIF usando o ID do frame pai."""
764+
"""Resolve um OOPIF usando o content frame id."""
763765
browser_handler = ConnectionHandler(
764-
connection_port=self._connection_handler._connection_port
766+
connection_port=self._element._connection_handler._connection_port
765767
)
766768
targets_response: GetTargetsResponse = await browser_handler.execute_command(
767769
TargetCommands.get_targets()
768770
)
769771
target_infos = targets_response.get('result', {}).get('targetInfos', [])
770772

773+
# O handler que pode resolver DOM.getFrameOwner para o contexto do elemento.
774+
# Quando o <iframe> está dentro de um OOPIF aninhado, o handler do Tab
775+
# não tem visibilidade; devemos rotear pela sessão que originalmente
776+
# encontrou o elemento.
777+
owner_handler = base_handler or self._element._connection_handler
778+
owner_session_id = base_session_id
779+
771780
# Estratégia 3a: Filhos diretos (caminho rápido)
772781
direct_children = [
773782
target_info
774783
for target_info in target_infos
775784
if target_info.get('type') in {'iframe', 'page'}
776-
and target_info.get('parentFrameId') == parent_frame_id
785+
and target_info.get('parentFrameId') == content_frame_id
777786
]
778787

779788
is_single_child = len(direct_children) == 1
@@ -803,7 +812,7 @@ async def _resolve_oopif_by_parent(
803812
# Caso OOPIF: confirmar propriedade via DOM.getFrameOwner
804813
if root_frame_id and backend_node_id is not None:
805814
owner_backend_id = await self._owner_backend_for(
806-
self._connection_handler, None, root_frame_id
815+
owner_handler, owner_session_id, root_frame_id
807816
)
808817
if owner_backend_id == backend_node_id:
809818
return (
@@ -830,10 +839,19 @@ async def _resolve_oopif_by_parent(
830839
root_frame = (frame_tree or {}).get('frame', {})
831840
root_frame_id = root_frame.get('id', '')
832841

842+
# Match direto: content_frame_id igual ao root frame ID do alvo
843+
if root_frame_id and root_frame_id == content_frame_id:
844+
return (
845+
browser_handler,
846+
attached_session_id,
847+
root_frame_id,
848+
root_frame.get('url'),
849+
)
850+
833851
# Primeiro tenta casar o dono do frame raiz via backend_node_id
834852
if root_frame_id and backend_node_id is not None:
835853
owner_backend_id = await self._owner_backend_for(
836-
self._connection_handler, None, root_frame_id
854+
owner_handler, owner_session_id, root_frame_id
837855
)
838856
if owner_backend_id == backend_node_id:
839857
return (
@@ -843,8 +861,10 @@ async def _resolve_oopif_by_parent(
843861
root_frame.get('url'),
844862
)
845863

846-
# Fallback: procurar frame filho cujo parentId seja parent_frame_id
847-
child_frame_id = WebElement._find_child_by_parent(frame_tree, parent_frame_id)
864+
# Fallback: procurar frame filho cujo parentId seja content_frame_id
865+
child_frame_id = IFrameContextResolver._find_child_by_parent(
866+
frame_tree, content_frame_id
867+
)
848868
if child_frame_id:
849869
return browser_handler, attached_session_id, child_frame_id, None
850870

@@ -1133,7 +1153,7 @@ direct_children = [
11331153
target_info
11341154
for target_info in target_infos
11351155
if target_info.get('type') in {'iframe', 'page'}
1136-
and target_info.get('parentFrameId') == parent_frame_id
1156+
and target_info.get('parentFrameId') == content_frame_id
11371157
]
11381158
if direct_children:
11391159
# Anexa hatchery, pula o escaneamento de todos os alvos

0 commit comments

Comments
 (0)