Skip to content

Commit fe41ddb

Browse files
committed
Support attributes in HTML paths in style mappings
1 parent d037ef5 commit fe41ddb

File tree

6 files changed

+102
-34
lines changed

6 files changed

+102
-34
lines changed

NEWS

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 1.7.0
2+
3+
* Support attributes in HTML paths in style mappings.
4+
15
# 1.6.0
26

37
* Support merged paragraphs when revisions are tracked.

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,12 @@ append a dot followed by the name of the class:
649649
h1.section-title
650650
```
651651

652+
To add an attribute, use square brackets similarly to a CSS attribute selector:
653+
654+
```
655+
p[lang='fr']
656+
```
657+
652658
To require that an element is fresh, use `:fresh`:
653659

654660
```

mammoth/html_paths.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,16 @@ def path(elements):
77
return HtmlPath(elements)
88

99

10-
def element(names, class_names=None, fresh=None, separator=None):
10+
def element(names, attributes=None, class_names=None, fresh=None, separator=None):
11+
if attributes is None:
12+
attributes = {}
1113
if class_names is None:
1214
class_names = []
1315
if fresh is None:
1416
fresh = False
1517
if class_names:
16-
attributes = {"class": " ".join(class_names)}
17-
else:
18-
attributes = {}
18+
attributes["class"] = " ".join(class_names)
19+
1920
return HtmlPathElement(html.tag(
2021
tag_names=names,
2122
attributes=attributes,
@@ -27,13 +28,13 @@ def element(names, class_names=None, fresh=None, separator=None):
2728
@cobble.data
2829
class HtmlPath(object):
2930
elements = cobble.field()
30-
31+
3132
def wrap(self, generate_nodes):
3233
nodes = generate_nodes()
3334

3435
for element in reversed(self.elements):
3536
nodes = element.wrap_nodes(nodes)
36-
37+
3738
return nodes
3839

3940

mammoth/styles/parser/html_path_parser.py

+57-17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
1+
import cobble
2+
13
from ... import html_paths
24
from .tokeniser import TokenType
3-
from .token_parser import parse_identifier, parse_string, try_parse_class_name
5+
from .token_parser import parse_identifier, parse_string
6+
7+
8+
@cobble.data
9+
class _AttributeOrClassName(object):
10+
name = cobble.field()
11+
value = cobble.field()
12+
append = cobble.field()
413

514

615
def parse_html_path(tokens):
@@ -12,51 +21,82 @@ def parse_html_path(tokens):
1221

1322
def _parse_html_path_elements(tokens):
1423
elements = []
15-
24+
1625
if tokens.peek_token_type() == TokenType.IDENTIFIER:
1726
elements.append(_parse_element(tokens))
18-
27+
1928
while tokens.try_skip_many(((TokenType.WHITESPACE, None), (TokenType.SYMBOL, ">"))):
2029
tokens.skip(TokenType.WHITESPACE)
2130
elements.append(_parse_element(tokens))
22-
31+
2332
return elements
2433

2534

2635
def _parse_element(tokens):
2736
tag_names = _parse_tag_names(tokens)
28-
class_names = _parse_class_names(tokens)
37+
attributes_list = _parse_attribute_or_class_names(tokens)
2938
is_fresh = _parse_is_fresh(tokens)
3039
separator = _parse_separator(tokens)
31-
40+
41+
attributes = {}
42+
for attribute in attributes_list:
43+
if attribute.append and attributes.get(attribute.name):
44+
attributes[attribute.name] += " " + attribute.value
45+
else:
46+
attributes[attribute.name] = attribute.value
47+
3248
return html_paths.element(
3349
tag_names,
34-
class_names=class_names,
50+
attributes=attributes,
3551
fresh=is_fresh,
3652
separator=separator,
3753
)
3854

3955

4056
def _parse_tag_names(tokens):
4157
tag_names = [parse_identifier(tokens)]
42-
58+
4359
while tokens.try_skip(TokenType.SYMBOL, "|"):
4460
tag_names.append(parse_identifier(tokens))
45-
61+
4662
return tag_names
4763

4864

49-
def _parse_class_names(tokens):
50-
class_names = []
51-
65+
def _parse_attribute_or_class_names(tokens):
66+
attribute_or_class_names = []
67+
5268
while True:
53-
class_name = try_parse_class_name(tokens)
54-
if class_name is None:
69+
attribute_or_class_name = _try_parse_attribute_or_class_name(tokens)
70+
if attribute_or_class_name is None:
5571
break
5672
else:
57-
class_names.append(class_name)
58-
59-
return class_names
73+
attribute_or_class_names.append(attribute_or_class_name)
74+
75+
return attribute_or_class_names
76+
77+
78+
def _try_parse_attribute_or_class_name(tokens):
79+
if tokens.is_next(TokenType.SYMBOL, "["):
80+
return _parse_attribute(tokens)
81+
if tokens.is_next(TokenType.SYMBOL, "."):
82+
return _parse_class_name(tokens)
83+
else:
84+
return None
85+
86+
87+
def _parse_attribute(tokens):
88+
tokens.skip(TokenType.SYMBOL, "[")
89+
name = parse_identifier(tokens)
90+
tokens.skip(TokenType.SYMBOL, "=")
91+
value = parse_string(tokens)
92+
tokens.skip(TokenType.SYMBOL, "]")
93+
return _AttributeOrClassName(name=name, value=value, append=False)
94+
95+
96+
def _parse_class_name(tokens):
97+
tokens.skip(TokenType.SYMBOL, ".")
98+
class_name = parse_identifier(tokens)
99+
return _AttributeOrClassName(name="class", value=class_name, append=True)
60100

61101

62102
def _parse_is_fresh(tokens):

mammoth/styles/parser/token_iterator.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -8,37 +8,36 @@ class TokenIterator(object):
88
def __init__(self, tokens):
99
self._tokens = tokens
1010
self._index = 0
11-
11+
1212
def peek_token_type(self):
1313
return self._tokens[self._index].type
14-
14+
1515
def next_value(self, token_type=None):
1616
return self._next(token_type).value
17-
17+
1818
def _next(self, token_type=None):
1919
token = self._tokens[self._index]
2020
if token_type is None or token.type == token_type:
2121
self._index += 1
2222
return token
2323
else:
2424
raise self._unexpected_token_type(token_type, token)
25-
25+
2626
def skip(self, token_type, token_value=None):
2727
token = self._tokens[self._index]
2828
if token.type == token_type and (token_value is None or token.value == token_value):
2929
self._index += 1
3030
return True
3131
else:
3232
raise self._unexpected_token_type(token_type, token)
33-
33+
3434
def try_skip(self, token_type, token_value=None):
35-
token = self._tokens[self._index]
36-
if token.type == token_type and (token_value is None or token.value == token_value):
35+
if self.is_next(token_type, token_value):
3736
self._index += 1
3837
return True
3938
else:
4039
return False
41-
40+
4241
def try_skip_many(self, tokens):
4342
start = self._index
4443
for token_type, token_value in tokens:
@@ -48,9 +47,13 @@ def try_skip_many(self, tokens):
4847
return False
4948
else:
5049
self._index += 1
51-
50+
5251
return True
53-
52+
53+
def is_next(self, token_type, token_value=None):
54+
token = self._tokens[self._index]
55+
return token.type == token_type and (token_value is None or token.value == token_value)
56+
5457
def _unexpected_token_type(self, token_type, token):
5558
raise LineParseError()
56-
59+

tests/styles/parser/html_path_parser_tests.py

+14
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,20 @@ def test_can_read_multiple_classes_on_element():
5353
)
5454

5555

56+
def test_can_read_attribute_on_element():
57+
assert_equal(
58+
html_paths.path([html_paths.element(["p"], attributes={"lang": "fr"})]),
59+
read_html_path("p[lang='fr']")
60+
)
61+
62+
63+
def test_can_read_multiple_attributes_on_element():
64+
assert_equal(
65+
html_paths.path([html_paths.element(["p"], attributes={"lang": "fr", "data-x": "y"})]),
66+
read_html_path("p[lang='fr'][data-x='y']")
67+
)
68+
69+
5670
def test_can_read_when_element_must_be_fresh():
5771
assert_equal(
5872
html_paths.path([html_paths.element(["p"], fresh=True)]),

0 commit comments

Comments
 (0)