Skip to content

Commit

Permalink
Improve xpath patterns for spaces after coma in xpath
Browse files Browse the repository at this point in the history
  • Loading branch information
maledorak committed Jan 25, 2025
1 parent bc5429d commit 15be547
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 11 deletions.
6 changes: 3 additions & 3 deletions emmetify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@ def emmetify_html(content, format="html", **options):
return emmetifier.emmetify(content)


def emmetify_compact_html(content, **options):
def emmetify_compact_html(content):
"""Convenience function for quick HTML conversion with simplified tags and attributes"""
emmetifier = Emmetifier.create(
emmetifier = Emmetifier(
format="html",
config={
"html": {
"skip_tags": True,
"prioritize_attributes": True,
"simplify_classes": True,
"simplify_images": True,
# LLM agents works better when they know the relative links, otherwise they will start looping on redirects
"simplify_relative_links": False,
"simplify_absolute_links": True,
}
},
**options,
)
return emmetifier.emmetify(content)

Expand Down
14 changes: 7 additions & 7 deletions emmetify/utils/xpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ def restore_attribute_in_xpath(xpath: str, tag: str, attr: str, replace_map: dic
rf"@{attr}=(?P<quote>['\"])(?P<value>.*?)(?P=quote)",
rf"@{attr.lower()}=(?P<quote>['\"])(?P<value>.*?)(?P=quote)",
# Contains function
rf"contains\(@{attr},(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"contains\(@{attr}\s*,\s*(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
# Normalize-space function
rf"normalize-space\(@{attr}\)=(?P<quote>['\"])(?P<value>.*?)(?P=quote)",
# Functions with normalize-space
rf"contains\(normalize-space\(@{attr}\),(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"contains\(normalize-space\(@{attr}\)\s*,\s*(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
# Nested functions with normalize-space
rf"ends-with\(normalize-space\(@{attr}\),(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"starts-with\(normalize-space\(@{attr}\),(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"ends-with\(normalize-space\(@{attr}\)\s*,\s*(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"starts-with\(normalize-space\(@{attr}\)\s*,\s*(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
# Other functions
rf"starts-with\(@{attr},(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"ends-with\(@{attr},(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"matches\(@{attr},(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"starts-with\(@{attr}\s*,\s*(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"ends-with\(@{attr}\s*,\s*(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
rf"matches\(@{attr}\s*,\s*(?P<quote>['\"])(?P<value>.*?)(?P=quote)\)",
]

# Compile patterns with case-insensitive matching
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "emmetify"
version = "0.0.1-rc.4"
version = "0.0.1-rc.5"
description = "Cut LLM costs and boost processing speed by transforming verbose HTML into efficient Emmet notation"
authors = ["Mariusz Korzekwa <[email protected]>"]
repository = "https://github.com/emmetify/emmetify-py"
Expand Down
21 changes: 21 additions & 0 deletions tests/utils/xpath_restore_classes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ def test_class_with_multiple_predicates_and_functions(self):
result = restore_classes_in_xpath(xpath, replace_map)
self.assertEqual(result, expected)

def test_class_with_multiple_predicates_and_functions_with_space(self):
xpath = "//*[@id='test' and contains(@class, 'john')]"
replace_map = {"john": "example-class"}
expected = "//*[@id='test' and contains(@class, 'example-class')]"
result = restore_classes_in_xpath(xpath, replace_map)
self.assertEqual(result, expected)

def test_complex_xpath_expression(self):
xpath = "//div//*[@class='john' and @data-id='123']//span[text()='Click here']"
replace_map = {"john": "example-class"}
Expand Down Expand Up @@ -158,6 +165,13 @@ def test_class_with_function_inside_predicate(self):
result = restore_classes_in_xpath(xpath, replace_map)
self.assertEqual(result, expected)

def test_class_with_function_inside_predicate_with_space(self):
xpath = "//*[@class=concat('john', 'doe')]"
replace_map = {"johndoe": "example-class"}
expected = "//*[@class=concat('john', 'doe')]"
result = restore_classes_in_xpath(xpath, replace_map)
self.assertEqual(result, expected)

def test_class_with_variable_in_predicate(self):
xpath = "//*[@class=$john]"
replace_map = {"$john": "example-class"}
Expand All @@ -179,6 +193,13 @@ def test_class_with_comment_in_predicate(self):
result = restore_classes_in_xpath(xpath, replace_map)
self.assertEqual(result, expected)

def test_class_with_double_slash_and_index(self):
xpath = "//footer//div[contains(@class, 'omi')]/text()[2]"
replace_map = {"omi": "example-class"}
expected = "//footer//div[contains(@class, 'example-class')]/text()[2]"
result = restore_classes_in_xpath(xpath, replace_map)
self.assertEqual(result, expected)

def test_class_with_case_sensitive_tag_name(self):
xpath = "//DIV[@class='john']"
replace_map = {"john": "example-class"}
Expand Down

0 comments on commit 15be547

Please sign in to comment.