Skip to content

Commit 3eefdf3

Browse files
kwschulzclaude
andauthored
feat: add table-driven tests and URL parsing fix
- Add unit tests for headless and timeout parameters (6 tests) - Add URL parsing tests (15 tests) - Convert credential sanitization to table-driven (16 tests) - Expand REDACTED_CASES and PRIVATE_IP_CASES (100 tests) - Expand SENSITIVE_FIELD_CASES and HEADER_REDACTION_CASES (123 tests) - Fix URL handling for har-capture get https://example.com - Skip capture tests when Playwright not installed Total: 498 tests, 68.5% coverage Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b905bd2 commit 3eefdf3

6 files changed

Lines changed: 634 additions & 105 deletions

File tree

src/har_capture/capture/browser.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pathlib import Path
1616
from typing import Any
1717

18-
from har_capture.capture.connectivity import check_device_connectivity
18+
from har_capture.capture.connectivity import _parse_target, check_device_connectivity
1919
from har_capture.capture.deps import check_playwright, install_browser_deps
2020
from har_capture.patterns import get_bloat_extensions
2121

@@ -269,16 +269,19 @@ def capture_device_har(
269269
if output_path.suffix != ".har":
270270
output_path = output_path.with_suffix(".har")
271271

272+
# Parse target to extract hostname (handles URLs like "https://example.com")
273+
host, _ = _parse_target(ip)
274+
272275
# Check connectivity and determine scheme
273276
reachable, scheme, error = check_device_connectivity(ip)
274277
if not reachable:
275278
return CaptureResult(
276279
har_path=output_path,
277280
success=False,
278-
error=error or f"Cannot connect to device at {ip}",
281+
error=error or f"Cannot connect to {host}",
279282
)
280283

281-
device_url = f"{scheme}://{ip}/"
284+
target_url = f"{scheme}://{host}/"
282285

283286
def launch_browser_and_capture() -> bool:
284287
"""Launch browser and capture HAR. Returns True on success."""
@@ -313,7 +316,7 @@ def launch_browser_and_capture() -> bool:
313316

314317
# Create page and navigate to device
315318
page = context.new_page()
316-
page.goto(device_url, wait_until="networkidle")
319+
page.goto(target_url, wait_until="networkidle")
317320

318321
if timeout is not None:
319322
# Automated mode: wait for timeout then close

src/har_capture/capture/connectivity.py

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
"""Device connectivity checking utilities.
1+
"""Target connectivity checking utilities.
22
3-
This module provides functions to check device reachability and authentication
3+
This module provides functions to check target reachability and authentication
44
requirements before launching the browser capture.
55
"""
66

@@ -10,27 +10,64 @@
1010
import ssl
1111
import urllib.error
1212
import urllib.request
13+
from urllib.parse import urlparse
1314

1415
_LOGGER = logging.getLogger(__name__)
1516

1617

17-
def check_device_connectivity(ip: str, timeout: int = 5) -> tuple[bool, str, str | None]:
18-
"""Check if device is reachable and determine the correct URL scheme.
18+
def _parse_target(target: str) -> tuple[str, str | None]:
19+
"""Parse a target string into hostname and optional scheme.
1920
20-
Tries HTTP first, then HTTPS if HTTP fails.
21+
Handles various input formats:
22+
- Full URL: "https://example.com" -> ("example.com", "https")
23+
- URL with path: "https://example.com/page" -> ("example.com", "https")
24+
- Hostname only: "example.com" -> ("example.com", None)
25+
- IP address: "192.168.1.1" -> ("192.168.1.1", None)
26+
- IP with port: "192.168.1.1:8080" -> ("192.168.1.1:8080", None)
2127
2228
Args:
23-
ip: Device IP address
29+
target: URL, hostname, or IP address
30+
31+
Returns:
32+
Tuple of (hostname_with_port, scheme_or_none)
33+
"""
34+
# Check if it looks like a URL (has scheme)
35+
if "://" in target:
36+
parsed = urlparse(target)
37+
host = parsed.netloc or parsed.path.split("/")[0]
38+
return host, parsed.scheme
39+
# No scheme - return as-is
40+
return target, None
41+
42+
43+
def check_device_connectivity(target: str, timeout: int = 5) -> tuple[bool, str, str | None]:
44+
"""Check if target is reachable and determine the correct URL scheme.
45+
46+
Tries the provided scheme first (if any), otherwise tries HTTP then HTTPS.
47+
48+
Args:
49+
target: URL, hostname, or IP address (e.g., "example.com", "https://example.com", "192.168.1.1")
2450
timeout: Connection timeout in seconds
2551
2652
Returns:
2753
Tuple of (reachable, scheme, error_message)
28-
- reachable: True if device responded
54+
- reachable: True if target responded
2955
- scheme: "http" or "https"
3056
- error_message: None if reachable, otherwise describes the problem
3157
"""
32-
for scheme in ["http", "https"]:
33-
url = f"{scheme}://{ip}/"
58+
# Parse target to extract hostname and any provided scheme
59+
host, provided_scheme = _parse_target(target)
60+
61+
# Determine which schemes to try
62+
if provided_scheme in ("http", "https"):
63+
schemes_to_try = [provided_scheme]
64+
else:
65+
schemes_to_try = ["http", "https"]
66+
67+
last_error: str | None = None
68+
69+
for scheme in schemes_to_try:
70+
url = f"{scheme}://{host}/"
3471
try:
3572
req = urllib.request.Request(url, method="GET")
3673
if scheme == "https":
@@ -43,17 +80,15 @@ def check_device_connectivity(ip: str, timeout: int = 5) -> tuple[bool, str, str
4380
urllib.request.urlopen(req, timeout=timeout)
4481
return True, scheme, None
4582
except urllib.error.HTTPError:
46-
# HTTP error means device is reachable (might need auth, that's fine)
83+
# HTTP error means target is reachable (might need auth, that's fine)
4784
return True, scheme, None
4885
except urllib.error.URLError as e:
4986
# Connection refused, timeout, etc - try next scheme
50-
if scheme == "https":
51-
return False, "http", f"Cannot connect to device at {ip}: {e.reason}"
87+
last_error = str(e.reason)
5288
except Exception as e:
53-
if scheme == "https":
54-
return False, "http", f"Cannot connect to device at {ip}: {e}"
89+
last_error = str(e)
5590

56-
return False, "http", f"Cannot connect to device at {ip}"
91+
return False, schemes_to_try[0], f"Cannot connect to {host}: {last_error}"
5792

5893

5994
def check_basic_auth(url: str, timeout: int = 5) -> tuple[bool, str | None]:

0 commit comments

Comments
 (0)