Skip to content

Commit 4c5e3a3

Browse files
authored
Merge pull request #366 from ma10/freee_a11y_gl-rst-processing-20250627
Improve the rst normalization.
2 parents 1c74d90 + f6d8b57 commit 4c5e3a3

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

tools/lib/freee_a11y_gl/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "freee_a11y_gl"
7-
version = "0.1.2"
7+
version = "0.1.3"
88
description = "A module to process a11y guidelines data"
99
authors = [
1010
{name = "Masafumi NAKANE", email = "[email protected]"}

tools/lib/freee_a11y_gl/src/freee_a11y_gl/yaml_processor/rst_processor.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,33 @@ def normalize_text(text: str) -> str:
2121
fullwidth_chars = r'[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF]'
2222
halfwidth_chars = r'[\u0000-\u007F\uFF61-\uFFDC\uFFE8-\uFFEE]'
2323

24-
# Remove whitespaces between fullwidth chars
25-
text = re.sub(rf'({fullwidth_chars})\s+({fullwidth_chars})', r'\1\2', text)
24+
# Define whitespace pattern excluding newlines
25+
whitespace_no_newline = r'[ \t\f\v\r\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000]+'
2626

27-
# Remove whitespaces between halfwidth chars and full width chars
28-
text = re.sub(rf'({fullwidth_chars})\s+({halfwidth_chars})', r'\1\2', text)
29-
text = re.sub(rf'({halfwidth_chars})\s+({fullwidth_chars})', r'\1\2', text)
27+
# Remove whitespaces (excluding newlines) between fullwidth chars
28+
text = re.sub(rf'({fullwidth_chars}){whitespace_no_newline}({fullwidth_chars})', r'\1\2', text)
29+
30+
# Remove whitespaces (excluding newlines) between halfwidth chars and full width chars
31+
# but preserve bullet point formatting
32+
text = re.sub(rf'({fullwidth_chars}){whitespace_no_newline}({halfwidth_chars})', r'\1\2', text)
33+
34+
# For halfwidth to fullwidth, use a different approach to preserve bullet points
35+
# First, temporarily replace bullet point patterns
36+
bullet_pattern = re.compile(r'^([ \t]*[*\-+])( +)', re.MULTILINE)
37+
bullet_matches = []
38+
39+
def bullet_replacer(match):
40+
bullet_matches.append(match.group(2)) # Store the spaces
41+
return match.group(1) + f'__BULLET_SPACE_{len(bullet_matches)-1}__'
42+
43+
text = bullet_pattern.sub(bullet_replacer, text)
44+
45+
# Now remove spaces between halfwidth and fullwidth chars
46+
text = re.sub(rf'({halfwidth_chars}){whitespace_no_newline}({fullwidth_chars})', r'\1\2', text)
47+
48+
# Restore bullet point spaces
49+
for i, spaces in enumerate(bullet_matches):
50+
text = text.replace(f'__BULLET_SPACE_{i}__', spaces)
3051

3152
return text
3253

0 commit comments

Comments
 (0)