Skip to content

Commit 480f277

Browse files
committed
Improve XPath export quoting
1 parent 4ab3555 commit 480f277

2 files changed

Lines changed: 21 additions & 3 deletions

File tree

autoscraper/auto_scraper.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -737,10 +737,19 @@ def _stack_to_xpath(stack):
737737
if not val:
738738
continue
739739
if isinstance(val, (list, tuple)):
740-
val = " ".join(str(v).strip() for v in val)
740+
val = " ".join(str(v) for v in val)
741741
if isinstance(val, str):
742-
val = val.strip()
743-
attr_conditions.append(f"@{attr}=\"{val}\"")
742+
val = val
743+
744+
if '"' in val and "'" in val:
745+
parts = val.split('"')
746+
quoted = 'concat(' + ', "\"", '.join(f'"{p}"' for p in parts[:-1]) + (', ' if len(parts) > 1 else '') + f'"{parts[-1]}")'
747+
elif '"' in val:
748+
quoted = f"'{val}'"
749+
else:
750+
quoted = f'"{val}"'
751+
752+
attr_conditions.append(f"@{attr}={quoted}")
744753
cond = ""
745754
if attr_conditions:
746755
cond = "[" + " and ".join(attr_conditions) + "]"

tests/unit/test_additional_features.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,12 @@ def test_get_rule_xpaths():
4747
rule_id = scraper.stack_list[0]["stack_id"]
4848
xpaths = scraper.get_rule_xpaths()
4949
assert xpaths[rule_id] == "/ul[1]/li[1]"
50+
51+
52+
def test_get_rule_xpaths_preserves_spaces():
53+
html = '<div style=" color: red; ">t</div>'
54+
scraper = AutoScraper()
55+
scraper.build(html=html, wanted_list=["t"])
56+
rule_id = scraper.stack_list[0]["stack_id"]
57+
xpaths = scraper.get_rule_xpaths()
58+
assert xpaths[rule_id] == '/div[@style=" color: red; "][1]'

0 commit comments

Comments
 (0)