Merge pull request #279 from ma10/refactor-yaml2json-20250219

ma10 · web-flow · commit 3250f7226f1f · 2025-02-20T13:03:59.000+09:00
yaml2json.pyのリファクタリング
diff --git a/tools/yaml2x/yaml2json/__init__.py b/tools/yaml2x/yaml2json/__init__.py
@@ -0,0 +1,19 @@
+"""
+YAML to JSON converter package for accessibility guidelines.
+
+This package provides functionality to convert YAML-based accessibility
+guidelines into JSON format, handling RST markup and multilingual content.
+"""
+
+from . import config, utils, rst_processor
+from .yaml2json import main
+
+__all__ = [
+    'main',           # Main conversion function
+    'config',         # Configuration handling
+    'utils',          # Utility functions
+    'rst_processor',  # RST markup processing
+]
+
+# Version information
+__version__ = '0.1.0'
diff --git a/tools/yaml2x/yaml2json/config.py b/tools/yaml2x/yaml2json/config.py
@@ -0,0 +1,102 @@
+"""
+Configuration module for YAML to JSON conversion.
+
+This module handles command-line argument parsing and configuration
+setup for the YAML to JSON conversion process.
+"""
+
+import argparse
+from pathlib import Path
+from typing import Dict, Any
+
+# Default values
+DEFAULT_OUTPUT_FILE: str = 'data.json'
+DEFAULT_BASE_URL: str = ''
+DEFAULT_BASE_DIR: str = '.'
+
+class ConfigError(Exception):
+    """Custom exception for configuration-related errors."""
+    pass
+
+def parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments for the conversion process.
+    
+    Returns:
+        Namespace containing parsed command-line arguments
+    """
+    parser = argparse.ArgumentParser(
+        description="Process YAML files and generate a JSON file containing checklist items."
+    )
+    parser.add_argument(
+        '--basedir', '-b',
+        type=str,
+        default=DEFAULT_BASE_DIR,
+        help='Base directory where the data directory is located.'
+    )
+    parser.add_argument(
+        '--output-file', '-o',
+        type=str,
+        default=DEFAULT_OUTPUT_FILE,
+        help='Output file path.'
+    )
+    parser.add_argument(
+        '--base-url', '-u',
+        type=str,
+        default=DEFAULT_BASE_URL,
+        help='Base URL for the links to related information.'
+    )
+    parser.add_argument(
+        '--publish', '-p',
+        action='store_true',
+        help='Generate for publishing'
+    )
+    return parser.parse_args()
+
+def process_arguments(args: argparse.Namespace) -> Dict[str, Any]:
+    """
+    Process the command-line arguments and validate paths.
+    
+    Args:
+        args: Parsed command-line arguments
+        
+    Returns:
+        Dictionary containing validated settings
+        
+    Raises:
+        ConfigError: If required paths are invalid
+    """
+    try:
+        basedir = Path(args.basedir).resolve()
+        if not basedir.is_dir():
+            raise ConfigError(f"Base directory does not exist: {basedir}")
+
+        if Path(args.output_file).is_absolute():
+            output_file = Path(args.output_file)
+        else:
+            output_file = basedir / args.output_file
+            
+        # Ensure output directory exists
+        output_file.parent.mkdir(parents=True, exist_ok=True)
+        
+        return {
+            'basedir': str(basedir),
+            'output_file': str(output_file),
+            'base_url': args.base_url,
+            'publish': args.publish
+        }
+    except Exception as e:
+        raise ConfigError(f"Error processing arguments: {str(e)}")
+
+def setup_configuration() -> Dict[str, Any]:
+    """
+    Set up and validate all configuration parameters.
+    
+    Returns:
+        Dict containing validated configuration
+    
+    Raises:
+        ConfigError: If required configuration is invalid
+    """
+    args = parse_args()
+    return process_arguments(args)
diff --git a/tools/yaml2x/yaml2json/initializer.py b/tools/yaml2x/yaml2json/initializer.py
@@ -1,85 +1,38 @@
-import os
-import argparse
-import pickle
+"""
+Legacy compatibility module for YAML to JSON conversion.
 
-LANGUAGES = ['ja', 'en']
-PICKLE_PATH = 'build/doctrees/environment.pickle'
+This module provides backward compatibility by re-exporting functionality
+that has been moved to more specialized modules (config.py and utils.py).
+New code should use those modules directly instead.
 
-def setup_parameters():
-    args = parse_args()
-    return process_arguments(args)
+@deprecated: Use config.py and utils.py modules instead
+"""
 
-def parse_args():
-    parser = argparse.ArgumentParser(description="Process YAML files and generate a JSON file containing checklist items.")
-    parser.add_argument('--basedir', '-b', type=str, default='.', help='Base directory where the data directory is located.')
-    parser.add_argument('--output-file', '-o', type=str, default='data.json', help='Output file path.')
-    parser.add_argument('--base-url', '-u', type=str, default='', help='Base URL for the links to related information.')
-    parser.add_argument('--publish', '-p', action='store_true', help='Generate for publishing')
-    return parser.parse_args()
+from typing import Dict, Any
 
-def process_arguments(args):
-    """
-    Process the command-line arguments to determine the build mode, target files, and other options.
+from . import config, utils
 
-    Args:
-        args: The parsed command-line arguments.
+# Re-export constants for backward compatibility
+LANGUAGES = utils.LANGUAGES
+PICKLE_PATH = utils.PICKLE_PATH
 
-    Returns:
-        A dictionary containing settings derived from the command-line arguments.
-    """
-    basedir = os.path.abspath(args.basedir)
-    if os.path.isabs(args.output_file):
-        output_file = args.output_file
-    elif not os.path.dirname(args.output_file):
-        output_file = os.path.join(basedir, args.output_file)
-    else:
-        output_file = os.path.abspath(args.output_file)
-    return {
-        'basedir': basedir,
-        'output_file': output_file,
-        'base_url': args.base_url,
-        'publish': args.publish
-    }
+# Re-export exception for backward compatibility
+InitializerError = config.ConfigError
 
-def get_info_links(basedir, baseurl = ''):
+def setup_parameters() -> Dict[str, Any]:
     """
-    Extract the labels from the environment pickle file.
-
-    Args:
-        basedir: The project root directory where the data directory is located for each language.
-        baseurl: The base URL for the links to related information.
-
-    Returns:
-        A dictionary containing the labels extracted from the environment pickle file.
+    @deprecated: Use config.setup_configuration() instead
     """
-    info = {}
-    path_prefix = {
-        'ja': '',
-        'en': 'en/'
-    }
-    for lang in LANGUAGES:
-        pickle_file = os.path.abspath(os.path.join(basedir, lang, PICKLE_PATH))
-        try:
-            with open(pickle_file, 'rb') as f:
-                doctree = pickle.load(f)
-        except Exception as e:
-            raise Exception(f'Error loading environment pickle file: {pickle_file}') from e
-        labels = doctree.domaindata['std']['labels']
-        for label in labels:
-            if labels[label][0] == '' or labels[label][1] == '' or labels[label][2] == '':
-                continue
-            if label not in info:
-                info[label] = {
-                    'text': {},
-                    'url': {}
-                }
-            info[label]['text'][lang] = labels[label][2]
-            info[label]['url'][lang] = f'{baseurl}/{path_prefix[lang]}{labels[label][0]}.html#{labels[label][1]}'
+    return config.setup_configuration()
 
-    return info
+def get_info_links(basedir: str, baseurl: str = '') -> Dict[str, Any]:
+    """
+    @deprecated: Use utils.get_info_links() instead
+    """
+    return utils.get_info_links(basedir, baseurl)
 
-def version_info(basedir):
-    version_info = {}
-    with open(os.path.join(basedir, 'version.py'), encoding='utf-8') as f:
-        exec(f.read(), version_info)
-    return version_info
+def version_info(basedir: str) -> Dict[str, str]:
+    """
+    @deprecated: Use utils.get_version_info() instead
+    """
+    return utils.get_version_info(basedir)
diff --git a/tools/yaml2x/yaml2json/rst_processor.py b/tools/yaml2x/yaml2json/rst_processor.py
@@ -0,0 +1,89 @@
+"""
+RST text processing module.
+
+This module handles the processing of RST markup in text content,
+including references, keyboard shortcuts, and text width formatting.
+"""
+
+import re
+from typing import Dict, Any
+
+# Regular expression patterns
+RST_REF_PATTERN = re.compile(r':ref:`([-a-z0-9]+)`')  # Match reference IDs
+RST_KBD_PATTERN = re.compile(r':kbd:`(.+)`')  # Match keyboard shortcuts
+
+def normalize_text(text: str) -> str:
+    """Normalize whitespace and spacing between characters."""
+    # Remove leading and trailing whitespaces
+    text = text.strip()
+
+    # Define regexp for half and full width chars
+    fullwidth_chars = r'[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF]'
+    halfwidth_chars = r'[\u0000-\u007F\uFF61-\uFFDC\uFFE8-\uFFEE]'
+
+    # Remove whitespaces between fullwidth chars
+    text = re.sub(rf'({fullwidth_chars})\s+({fullwidth_chars})', r'\1\2', text)
+
+    # Remove whitespaces between halfwidth chars and full width chars
+    text = re.sub(rf'({fullwidth_chars})\s+({halfwidth_chars})', r'\1\2', text)
+    text = re.sub(rf'({halfwidth_chars})\s+({fullwidth_chars})', r'\1\2', text)
+
+    return text
+
+def process_rst_text(text: str, info: Dict[str, Any], lang: str) -> str:
+    """
+    Process RST markup text by replacing references and keyboard shortcuts.
+    
+    Args:
+        text: The RST text to process
+        info: Dictionary containing reference information
+        lang: Language code (e.g. 'en', 'ja')
+    
+    Returns:
+        Processed text with RST markup replaced
+    """
+    def ref_replace(match):
+        """Replace reference with its text."""
+        ref_id = match.group(1)
+        if ref_id not in info:
+            return match.group(0)  # Keep original if reference not found
+        return info[ref_id]['text'][lang]
+
+    # Replace references
+    text = RST_REF_PATTERN.sub(ref_replace, text)
+    
+    # Replace keyboard shortcuts
+    text = RST_KBD_PATTERN.sub(lambda m: m.group(1), text)
+
+    # Only normalize spacing for Japanese text
+    if lang == 'ja':
+        text = normalize_text(text)
+    return text
+
+def process_rst_condition(condition: Dict[str, Any], info: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Process RST markup in condition data.
+    
+    Args:
+        condition: Dictionary containing condition data
+        info: Dictionary containing reference information
+    
+    Returns:
+        Processed condition with RST markup replaced
+    """
+    if condition['type'] == 'simple':
+        if 'procedure' in condition:
+            for lang in condition['procedure']['procedure']:
+                condition['procedure']['procedure'][lang] = process_rst_text(
+                    condition['procedure']['procedure'][lang], 
+                    info, 
+                    lang
+                )
+        return condition
+
+    # Process nested conditions recursively
+    condition['conditions'] = [
+        process_rst_condition(cond, info) 
+        for cond in condition['conditions']
+    ]
+    return condition
diff --git a/tools/yaml2x/yaml2json/utils.py b/tools/yaml2x/yaml2json/utils.py
diff --git a/tools/yaml2x/yaml2json/yaml2json.py b/tools/yaml2x/yaml2json/yaml2json.py