Skip to content

Commit 3250f72

Browse files
authored
Merge pull request #279 from ma10/refactor-yaml2json-20250219
yaml2json.pyのリファクタリング
2 parents 79b0798 + a6f9650 commit 3250f72

File tree

6 files changed

+387
-121
lines changed

6 files changed

+387
-121
lines changed

tools/yaml2x/yaml2json/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
"""
2+
YAML to JSON converter package for accessibility guidelines.
3+
4+
This package provides functionality to convert YAML-based accessibility
5+
guidelines into JSON format, handling RST markup and multilingual content.
6+
"""
7+
8+
from . import config, utils, rst_processor
9+
from .yaml2json import main
10+
11+
__all__ = [
12+
'main', # Main conversion function
13+
'config', # Configuration handling
14+
'utils', # Utility functions
15+
'rst_processor', # RST markup processing
16+
]
17+
18+
# Version information
19+
__version__ = '0.1.0'

tools/yaml2x/yaml2json/config.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""
2+
Configuration module for YAML to JSON conversion.
3+
4+
This module handles command-line argument parsing and configuration
5+
setup for the YAML to JSON conversion process.
6+
"""
7+
8+
import argparse
9+
from pathlib import Path
10+
from typing import Dict, Any
11+
12+
# Default values
13+
DEFAULT_OUTPUT_FILE: str = 'data.json'
14+
DEFAULT_BASE_URL: str = ''
15+
DEFAULT_BASE_DIR: str = '.'
16+
17+
class ConfigError(Exception):
18+
"""Custom exception for configuration-related errors."""
19+
pass
20+
21+
def parse_args() -> argparse.Namespace:
22+
"""
23+
Parse command-line arguments for the conversion process.
24+
25+
Returns:
26+
Namespace containing parsed command-line arguments
27+
"""
28+
parser = argparse.ArgumentParser(
29+
description="Process YAML files and generate a JSON file containing checklist items."
30+
)
31+
parser.add_argument(
32+
'--basedir', '-b',
33+
type=str,
34+
default=DEFAULT_BASE_DIR,
35+
help='Base directory where the data directory is located.'
36+
)
37+
parser.add_argument(
38+
'--output-file', '-o',
39+
type=str,
40+
default=DEFAULT_OUTPUT_FILE,
41+
help='Output file path.'
42+
)
43+
parser.add_argument(
44+
'--base-url', '-u',
45+
type=str,
46+
default=DEFAULT_BASE_URL,
47+
help='Base URL for the links to related information.'
48+
)
49+
parser.add_argument(
50+
'--publish', '-p',
51+
action='store_true',
52+
help='Generate for publishing'
53+
)
54+
return parser.parse_args()
55+
56+
def process_arguments(args: argparse.Namespace) -> Dict[str, Any]:
57+
"""
58+
Process the command-line arguments and validate paths.
59+
60+
Args:
61+
args: Parsed command-line arguments
62+
63+
Returns:
64+
Dictionary containing validated settings
65+
66+
Raises:
67+
ConfigError: If required paths are invalid
68+
"""
69+
try:
70+
basedir = Path(args.basedir).resolve()
71+
if not basedir.is_dir():
72+
raise ConfigError(f"Base directory does not exist: {basedir}")
73+
74+
if Path(args.output_file).is_absolute():
75+
output_file = Path(args.output_file)
76+
else:
77+
output_file = basedir / args.output_file
78+
79+
# Ensure output directory exists
80+
output_file.parent.mkdir(parents=True, exist_ok=True)
81+
82+
return {
83+
'basedir': str(basedir),
84+
'output_file': str(output_file),
85+
'base_url': args.base_url,
86+
'publish': args.publish
87+
}
88+
except Exception as e:
89+
raise ConfigError(f"Error processing arguments: {str(e)}")
90+
91+
def setup_configuration() -> Dict[str, Any]:
92+
"""
93+
Set up and validate all configuration parameters.
94+
95+
Returns:
96+
Dict containing validated configuration
97+
98+
Raises:
99+
ConfigError: If required configuration is invalid
100+
"""
101+
args = parse_args()
102+
return process_arguments(args)
Lines changed: 27 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,38 @@
1-
import os
2-
import argparse
3-
import pickle
1+
"""
2+
Legacy compatibility module for YAML to JSON conversion.
43
5-
LANGUAGES = ['ja', 'en']
6-
PICKLE_PATH = 'build/doctrees/environment.pickle'
4+
This module provides backward compatibility by re-exporting functionality
5+
that has been moved to more specialized modules (config.py and utils.py).
6+
New code should use those modules directly instead.
77
8-
def setup_parameters():
9-
args = parse_args()
10-
return process_arguments(args)
8+
@deprecated: Use config.py and utils.py modules instead
9+
"""
1110

12-
def parse_args():
13-
parser = argparse.ArgumentParser(description="Process YAML files and generate a JSON file containing checklist items.")
14-
parser.add_argument('--basedir', '-b', type=str, default='.', help='Base directory where the data directory is located.')
15-
parser.add_argument('--output-file', '-o', type=str, default='data.json', help='Output file path.')
16-
parser.add_argument('--base-url', '-u', type=str, default='', help='Base URL for the links to related information.')
17-
parser.add_argument('--publish', '-p', action='store_true', help='Generate for publishing')
18-
return parser.parse_args()
11+
from typing import Dict, Any
1912

20-
def process_arguments(args):
21-
"""
22-
Process the command-line arguments to determine the build mode, target files, and other options.
13+
from . import config, utils
2314

24-
Args:
25-
args: The parsed command-line arguments.
15+
# Re-export constants for backward compatibility
16+
LANGUAGES = utils.LANGUAGES
17+
PICKLE_PATH = utils.PICKLE_PATH
2618

27-
Returns:
28-
A dictionary containing settings derived from the command-line arguments.
29-
"""
30-
basedir = os.path.abspath(args.basedir)
31-
if os.path.isabs(args.output_file):
32-
output_file = args.output_file
33-
elif not os.path.dirname(args.output_file):
34-
output_file = os.path.join(basedir, args.output_file)
35-
else:
36-
output_file = os.path.abspath(args.output_file)
37-
return {
38-
'basedir': basedir,
39-
'output_file': output_file,
40-
'base_url': args.base_url,
41-
'publish': args.publish
42-
}
19+
# Re-export exception for backward compatibility
20+
InitializerError = config.ConfigError
4321

44-
def get_info_links(basedir, baseurl = ''):
22+
def setup_parameters() -> Dict[str, Any]:
4523
"""
46-
Extract the labels from the environment pickle file.
47-
48-
Args:
49-
basedir: The project root directory where the data directory is located for each language.
50-
baseurl: The base URL for the links to related information.
51-
52-
Returns:
53-
A dictionary containing the labels extracted from the environment pickle file.
24+
@deprecated: Use config.setup_configuration() instead
5425
"""
55-
info = {}
56-
path_prefix = {
57-
'ja': '',
58-
'en': 'en/'
59-
}
60-
for lang in LANGUAGES:
61-
pickle_file = os.path.abspath(os.path.join(basedir, lang, PICKLE_PATH))
62-
try:
63-
with open(pickle_file, 'rb') as f:
64-
doctree = pickle.load(f)
65-
except Exception as e:
66-
raise Exception(f'Error loading environment pickle file: {pickle_file}') from e
67-
labels = doctree.domaindata['std']['labels']
68-
for label in labels:
69-
if labels[label][0] == '' or labels[label][1] == '' or labels[label][2] == '':
70-
continue
71-
if label not in info:
72-
info[label] = {
73-
'text': {},
74-
'url': {}
75-
}
76-
info[label]['text'][lang] = labels[label][2]
77-
info[label]['url'][lang] = f'{baseurl}/{path_prefix[lang]}{labels[label][0]}.html#{labels[label][1]}'
26+
return config.setup_configuration()
7827

79-
return info
28+
def get_info_links(basedir: str, baseurl: str = '') -> Dict[str, Any]:
29+
"""
30+
@deprecated: Use utils.get_info_links() instead
31+
"""
32+
return utils.get_info_links(basedir, baseurl)
8033

81-
def version_info(basedir):
82-
version_info = {}
83-
with open(os.path.join(basedir, 'version.py'), encoding='utf-8') as f:
84-
exec(f.read(), version_info)
85-
return version_info
34+
def version_info(basedir: str) -> Dict[str, str]:
35+
"""
36+
@deprecated: Use utils.get_version_info() instead
37+
"""
38+
return utils.get_version_info(basedir)
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
"""
2+
RST text processing module.
3+
4+
This module handles the processing of RST markup in text content,
5+
including references, keyboard shortcuts, and text width formatting.
6+
"""
7+
8+
import re
9+
from typing import Dict, Any
10+
11+
# Regular expression patterns
12+
RST_REF_PATTERN = re.compile(r':ref:`([-a-z0-9]+)`') # Match reference IDs
13+
RST_KBD_PATTERN = re.compile(r':kbd:`(.+)`') # Match keyboard shortcuts
14+
15+
def normalize_text(text: str) -> str:
16+
"""Normalize whitespace and spacing between characters."""
17+
# Remove leading and trailing whitespaces
18+
text = text.strip()
19+
20+
# Define regexp for half and full width chars
21+
fullwidth_chars = r'[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF]'
22+
halfwidth_chars = r'[\u0000-\u007F\uFF61-\uFFDC\uFFE8-\uFFEE]'
23+
24+
# Remove whitespaces between fullwidth chars
25+
text = re.sub(rf'({fullwidth_chars})\s+({fullwidth_chars})', r'\1\2', text)
26+
27+
# Remove whitespaces between halfwidth chars and full width chars
28+
text = re.sub(rf'({fullwidth_chars})\s+({halfwidth_chars})', r'\1\2', text)
29+
text = re.sub(rf'({halfwidth_chars})\s+({fullwidth_chars})', r'\1\2', text)
30+
31+
return text
32+
33+
def process_rst_text(text: str, info: Dict[str, Any], lang: str) -> str:
34+
"""
35+
Process RST markup text by replacing references and keyboard shortcuts.
36+
37+
Args:
38+
text: The RST text to process
39+
info: Dictionary containing reference information
40+
lang: Language code (e.g. 'en', 'ja')
41+
42+
Returns:
43+
Processed text with RST markup replaced
44+
"""
45+
def ref_replace(match):
46+
"""Replace reference with its text."""
47+
ref_id = match.group(1)
48+
if ref_id not in info:
49+
return match.group(0) # Keep original if reference not found
50+
return info[ref_id]['text'][lang]
51+
52+
# Replace references
53+
text = RST_REF_PATTERN.sub(ref_replace, text)
54+
55+
# Replace keyboard shortcuts
56+
text = RST_KBD_PATTERN.sub(lambda m: m.group(1), text)
57+
58+
# Only normalize spacing for Japanese text
59+
if lang == 'ja':
60+
text = normalize_text(text)
61+
return text
62+
63+
def process_rst_condition(condition: Dict[str, Any], info: Dict[str, Any]) -> Dict[str, Any]:
64+
"""
65+
Process RST markup in condition data.
66+
67+
Args:
68+
condition: Dictionary containing condition data
69+
info: Dictionary containing reference information
70+
71+
Returns:
72+
Processed condition with RST markup replaced
73+
"""
74+
if condition['type'] == 'simple':
75+
if 'procedure' in condition:
76+
for lang in condition['procedure']['procedure']:
77+
condition['procedure']['procedure'][lang] = process_rst_text(
78+
condition['procedure']['procedure'][lang],
79+
info,
80+
lang
81+
)
82+
return condition
83+
84+
# Process nested conditions recursively
85+
condition['conditions'] = [
86+
process_rst_condition(cond, info)
87+
for cond in condition['conditions']
88+
]
89+
return condition

0 commit comments

Comments
 (0)