Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions tools/yaml2x/yaml2json/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
YAML to JSON converter package for accessibility guidelines.

This package provides functionality to convert YAML-based accessibility
guidelines into JSON format, handling RST markup and multilingual content.
"""

from . import config, utils, rst_processor
from .yaml2json import main

__all__ = [
'main', # Main conversion function
'config', # Configuration handling
'utils', # Utility functions
'rst_processor', # RST markup processing
]

# Version information
__version__ = '0.1.0'
102 changes: 102 additions & 0 deletions tools/yaml2x/yaml2json/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""
Configuration module for YAML to JSON conversion.

This module handles command-line argument parsing and configuration
setup for the YAML to JSON conversion process.
"""

import argparse
from pathlib import Path
from typing import Dict, Any

# Default values
DEFAULT_OUTPUT_FILE: str = 'data.json'
DEFAULT_BASE_URL: str = ''
DEFAULT_BASE_DIR: str = '.'

class ConfigError(Exception):
"""Custom exception for configuration-related errors."""
pass

def parse_args() -> argparse.Namespace:
"""
Parse command-line arguments for the conversion process.

Returns:
Namespace containing parsed command-line arguments
"""
parser = argparse.ArgumentParser(
description="Process YAML files and generate a JSON file containing checklist items."
)
parser.add_argument(
'--basedir', '-b',
type=str,
default=DEFAULT_BASE_DIR,
help='Base directory where the data directory is located.'
)
parser.add_argument(
'--output-file', '-o',
type=str,
default=DEFAULT_OUTPUT_FILE,
help='Output file path.'
)
parser.add_argument(
'--base-url', '-u',
type=str,
default=DEFAULT_BASE_URL,
help='Base URL for the links to related information.'
)
parser.add_argument(
'--publish', '-p',
action='store_true',
help='Generate for publishing'
)
return parser.parse_args()

def process_arguments(args: argparse.Namespace) -> Dict[str, Any]:
"""
Process the command-line arguments and validate paths.

Args:
args: Parsed command-line arguments

Returns:
Dictionary containing validated settings

Raises:
ConfigError: If required paths are invalid
"""
try:
basedir = Path(args.basedir).resolve()
if not basedir.is_dir():
raise ConfigError(f"Base directory does not exist: {basedir}")

if Path(args.output_file).is_absolute():
output_file = Path(args.output_file)
else:
output_file = basedir / args.output_file

# Ensure output directory exists
output_file.parent.mkdir(parents=True, exist_ok=True)

return {
'basedir': str(basedir),
'output_file': str(output_file),
'base_url': args.base_url,
'publish': args.publish
}
except Exception as e:
raise ConfigError(f"Error processing arguments: {str(e)}")

def setup_configuration() -> Dict[str, Any]:
"""
Set up and validate all configuration parameters.

Returns:
Dict containing validated configuration

Raises:
ConfigError: If required configuration is invalid
"""
args = parse_args()
return process_arguments(args)
101 changes: 27 additions & 74 deletions tools/yaml2x/yaml2json/initializer.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,38 @@
import os
import argparse
import pickle
"""
Legacy compatibility module for YAML to JSON conversion.

LANGUAGES = ['ja', 'en']
PICKLE_PATH = 'build/doctrees/environment.pickle'
This module provides backward compatibility by re-exporting functionality
that has been moved to more specialized modules (config.py and utils.py).
New code should use those modules directly instead.

def setup_parameters():
args = parse_args()
return process_arguments(args)
@deprecated: Use config.py and utils.py modules instead
"""

def parse_args():
parser = argparse.ArgumentParser(description="Process YAML files and generate a JSON file containing checklist items.")
parser.add_argument('--basedir', '-b', type=str, default='.', help='Base directory where the data directory is located.')
parser.add_argument('--output-file', '-o', type=str, default='data.json', help='Output file path.')
parser.add_argument('--base-url', '-u', type=str, default='', help='Base URL for the links to related information.')
parser.add_argument('--publish', '-p', action='store_true', help='Generate for publishing')
return parser.parse_args()
from typing import Dict, Any

def process_arguments(args):
"""
Process the command-line arguments to determine the build mode, target files, and other options.
from . import config, utils

Args:
args: The parsed command-line arguments.
# Re-export constants for backward compatibility
LANGUAGES = utils.LANGUAGES
PICKLE_PATH = utils.PICKLE_PATH

Returns:
A dictionary containing settings derived from the command-line arguments.
"""
basedir = os.path.abspath(args.basedir)
if os.path.isabs(args.output_file):
output_file = args.output_file
elif not os.path.dirname(args.output_file):
output_file = os.path.join(basedir, args.output_file)
else:
output_file = os.path.abspath(args.output_file)
return {
'basedir': basedir,
'output_file': output_file,
'base_url': args.base_url,
'publish': args.publish
}
# Re-export exception for backward compatibility
InitializerError = config.ConfigError

def get_info_links(basedir, baseurl = ''):
def setup_parameters() -> Dict[str, Any]:
"""
Extract the labels from the environment pickle file.

Args:
basedir: The project root directory where the data directory is located for each language.
baseurl: The base URL for the links to related information.

Returns:
A dictionary containing the labels extracted from the environment pickle file.
@deprecated: Use config.setup_configuration() instead
"""
info = {}
path_prefix = {
'ja': '',
'en': 'en/'
}
for lang in LANGUAGES:
pickle_file = os.path.abspath(os.path.join(basedir, lang, PICKLE_PATH))
try:
with open(pickle_file, 'rb') as f:
doctree = pickle.load(f)
except Exception as e:
raise Exception(f'Error loading environment pickle file: {pickle_file}') from e
labels = doctree.domaindata['std']['labels']
for label in labels:
if labels[label][0] == '' or labels[label][1] == '' or labels[label][2] == '':
continue
if label not in info:
info[label] = {
'text': {},
'url': {}
}
info[label]['text'][lang] = labels[label][2]
info[label]['url'][lang] = f'{baseurl}/{path_prefix[lang]}{labels[label][0]}.html#{labels[label][1]}'
return config.setup_configuration()

return info
def get_info_links(basedir: str, baseurl: str = '') -> Dict[str, Any]:
"""
@deprecated: Use utils.get_info_links() instead
"""
return utils.get_info_links(basedir, baseurl)

def version_info(basedir):
version_info = {}
with open(os.path.join(basedir, 'version.py'), encoding='utf-8') as f:
exec(f.read(), version_info)
return version_info
def version_info(basedir: str) -> Dict[str, str]:
"""
@deprecated: Use utils.get_version_info() instead
"""
return utils.get_version_info(basedir)
89 changes: 89 additions & 0 deletions tools/yaml2x/yaml2json/rst_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""
RST text processing module.

This module handles the processing of RST markup in text content,
including references, keyboard shortcuts, and text width formatting.
"""

import re
from typing import Dict, Any

# Regular expression patterns
RST_REF_PATTERN = re.compile(r':ref:`([-a-z0-9]+)`') # Match reference IDs
RST_KBD_PATTERN = re.compile(r':kbd:`(.+)`') # Match keyboard shortcuts

def normalize_text(text: str) -> str:
"""Normalize whitespace and spacing between characters."""
# Remove leading and trailing whitespaces
text = text.strip()

# Define regexp for half and full width chars
fullwidth_chars = r'[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF]'
halfwidth_chars = r'[\u0000-\u007F\uFF61-\uFFDC\uFFE8-\uFFEE]'

# Remove whitespaces between fullwidth chars
text = re.sub(rf'({fullwidth_chars})\s+({fullwidth_chars})', r'\1\2', text)

# Remove whitespaces between halfwidth chars and full width chars
text = re.sub(rf'({fullwidth_chars})\s+({halfwidth_chars})', r'\1\2', text)
text = re.sub(rf'({halfwidth_chars})\s+({fullwidth_chars})', r'\1\2', text)

return text

def process_rst_text(text: str, info: Dict[str, Any], lang: str) -> str:
"""
Process RST markup text by replacing references and keyboard shortcuts.

Args:
text: The RST text to process
info: Dictionary containing reference information
lang: Language code (e.g. 'en', 'ja')

Returns:
Processed text with RST markup replaced
"""
def ref_replace(match):
"""Replace reference with its text."""
ref_id = match.group(1)
if ref_id not in info:
return match.group(0) # Keep original if reference not found
return info[ref_id]['text'][lang]

# Replace references
text = RST_REF_PATTERN.sub(ref_replace, text)

# Replace keyboard shortcuts
text = RST_KBD_PATTERN.sub(lambda m: m.group(1), text)

# Only normalize spacing for Japanese text
if lang == 'ja':
text = normalize_text(text)
return text

def process_rst_condition(condition: Dict[str, Any], info: Dict[str, Any]) -> Dict[str, Any]:
"""
Process RST markup in condition data.

Args:
condition: Dictionary containing condition data
info: Dictionary containing reference information

Returns:
Processed condition with RST markup replaced
"""
if condition['type'] == 'simple':
if 'procedure' in condition:
for lang in condition['procedure']['procedure']:
condition['procedure']['procedure'][lang] = process_rst_text(
condition['procedure']['procedure'][lang],
info,
lang
)
return condition

# Process nested conditions recursively
condition['conditions'] = [
process_rst_condition(cond, info)
for cond in condition['conditions']
]
return condition
Loading