diff --git a/.gitignore b/.gitignore index ef2ea62..19ecd38 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ logs/ /mgrep /mgit /demo +/yu # Local notes (not tracked) local/ \ No newline at end of file diff --git a/docs/argmojo_overall_planning.md b/docs/argmojo_overall_planning.md index 563c4d5..e50924b 100644 --- a/docs/argmojo_overall_planning.md +++ b/docs/argmojo_overall_planning.md @@ -78,7 +78,7 @@ These features appear across multiple libraries and depend only on string operat | Mutual implication (`implies`) | — | — | — | — | ArgMojo unique feature | **Done** | | Stdin value (`-` convention) | — | — | ✓ | — | Unix convention | Phase 5 | | Shell completion script generation | — | ✓ | ✓ | ✓ | bash / zsh / fish | **Done** | -| CJK-aware help formatting | — | — | — | — | I need it personally | Phase 6 | +| CJK-aware help formatting | — | — | — | — | I need it personally | **Done** | | CJK full-to-half-width correction | — | — | — | — | I need it personally | Phase 6 | | CJK punctuation detection | — | — | — | — | I need it personally | Phase 6 | | Typed retrieval (`get_int()` etc.) | ✓ | ✓ | ✓ | ✓ | | **Done** | @@ -141,24 +141,30 @@ This gives us the raw list of argument strings, and the remaining task is to imp ```txt src/argmojo/ -├── __init__.mojo # Package exports (Argument, Command, ParseResult) -├── argument.mojo # Argument struct — argument definition with builder pattern -├── command.mojo # Command struct — command definition & parsing -└── parse_result.mojo # ParseResult struct — parsed values -tests/ -├── test_parse.mojo # Core parsing tests (flags, values, shorts, etc.) -├── test_groups.mojo # Group constraint tests (exclusive, conditional, etc.) -├── test_collect.mojo # Collection feature tests (append, delimiter, number_of_values) -├── test_help.mojo # Help output tests (formatting, colours, alignment) -├── test_extras.mojo # Range, map, alias, deprecated tests -├── test_subcommands.mojo # Subcommand tests (dispatch, help sub, unknown sub, etc.) -├── test_negative_numbers.mojo # Negative number passthrough tests -├── test_persistent.mojo # Persistent (global) flag tests +├── __init__.mojo # Package exports (Argument, Command, ParseResult) +├── argument.mojo # Argument struct — argument definition with builder pattern +├── command.mojo # Command struct — command definition & parsing +├── parse_result.mojo # ParseResult struct — parsed values +└── utils.mojo # Internal utilities — ANSI colours, display helpers +tests/ # 424 tests across 14 files +├── test_parse.mojo # Core parsing tests (flags, values, shorts, etc.) +├── test_groups.mojo # Group constraint tests (exclusive, conditional, etc.) +├── test_collect.mojo # Collection feature tests (append, delimiter, number_of_values) +├── test_help.mojo # Help output tests (formatting, colours, alignment) +├── test_extras.mojo # Range, map, alias, deprecated tests +├── test_subcommands.mojo # Subcommand tests (dispatch, help sub, unknown sub, etc.) +├── test_negative_numbers.mojo # Negative number passthrough tests +├── test_persistent.mojo # Persistent (global) flag tests +├── test_typo_suggestions.mojo # Levenshtein typo suggestion tests +├── test_completion.mojo # Shell completion script generation tests +├── test_implies.mojo # Mutual implication and cycle detection tests ├── test_const_require_equals.mojo # default_if_no_value and require_equals tests -└── test_response_file.mojo # response file (@args.txt) expansion tests +├── test_response_file.mojo # response file (@args.txt) expansion tests +└── test_remainder_known.mojo # remainder, parse_known_arguments, allow_hyphen_values tests examples/ -├── mgrep.mojo # grep-like CLI example (no subcommands) -└── mgit.mojo # git-like CLI example (with subcommands) +├── demo.mojo # comprehensive showcase of all ArgMojo features +├── mgrep.mojo # grep-like CLI example (no subcommands) +└── mgit.mojo # git-like CLI example (with subcommands) ``` ### 4.2 What's Already Done ✓ @@ -205,7 +211,20 @@ examples/ | Range clamping (`.range[1, 100]().clamp()` → adjust + warn instead of error) | ✓ | ✓ | | Default-if-no-value (`.default_if_no_value("gzip")` → optional value with fallback) | ✓ | ✓ | | Require equals syntax (`.require_equals()` → `--key=value` only) | ✓ | ✓ | -| Response file (`command.response_file_prefix()` → `@args.txt` expands file contents) | ✓ | ✓ | +| Response file (`command.response_file_prefix()` → `@args.txt` expands file contents) | ✓ ⚠ | ✓ | +| Typo suggestions (Levenshtein "did you mean ...?" for long options and subcommands) | ✓ | ✓ | +| Flag counter ceiling (`.count().max[3]()` → cap with warning) | ✓ | ✓ | +| Shell completion script generation (`generate_completion("bash"\|"zsh"\|"fish")`) | ✓ | ✓ | +| Subcommand aliases (`command_aliases(["co"])`) | ✓ | ✓ | +| Hidden subcommands (`sub.hidden()` → excluded from help, completions, errors) | ✓ | ✓ | +| `NO_COLOR` env variable (suppress ANSI output when set) | ✓ | ✓ | +| Mutual implication (`command.implies("debug", "verbose")` with chained + cycle detection) | ✓ | ✓ | +| Remainder positional (`.remainder()` → consume all remaining tokens) | ✓ | ✓ | +| Partial parsing (`parse_known_arguments()` → collect unknown options) | ✓ | ✓ | +| Allow hyphen values (`.allow_hyphen_values()` → accept `-x` as positional value) | ✓ | ✓ | +| Value name rename (`.metavar()` → `.value_name()`) | ✓ | ✓ | + +> ⚠ Response file support is temporarily disabled due to a Mojo compiler deadlock under `-D ASSERT=all`. The implementation is preserved and will be re-enabled when the compiler bug is fixed. ### 4.3 API Design (Current) @@ -238,6 +257,9 @@ fn main() raises: --flag # Boolean flag --key value # Key-value (space separated) --key=value # Key-value (equals separated) +--key=value # Require-equals syntax (when .require_equals()) +--key # Default-if-no-value (when .default_if_no_value()) +--no-flag # Negation (when .negatable()) --verb # Prefix match → --verbose (if unambiguous) # Short options @@ -253,8 +275,15 @@ pattern # By order of add_argument() calls # Special -- # Stop parsing options; rest becomes positional ---help / -h # Show auto-generated help +--help / -h / -? # Show auto-generated help --version / -V # Show version +@args.txt # Response file expansion (when enabled) +cmd rest... # Remainder positional (consume all remaining tokens) + +# Subcommands +app search pattern # Dispatch to subcommand +app help search # Show subcommand help +app --verbose search # Persistent flags before subcommand ``` ### 4.5 Validation & Help Behavior Matrix @@ -384,8 +413,8 @@ Subcommands (`app [args]`) are the first feature that turns ArgMojo #### Architecture: composition inside `Command` -- **No file split.** Core logic stays in `command.mojo`. Mojo has no partial structs, so splitting would force free functions + parameter threading for little gain at ~2250 lines. ANSI colour constants and small utility functions live in `utils.mojo` (internal-only, all symbols `_`-prefixed). -- **No tokenizer.** The single-pass cursor walk (`startswith` checks) is sufficient. Token types are trivially identified inline. The parsing logic in `parse_arguments()` delegates to four sub-methods (`_parse_long_option`, `_parse_short_single`, `_parse_short_merged`, `_dispatch_subcommand`) for readability, but the overall flow is still a simple cursor walk. +- **No file split.** Core logic stays in `command.mojo`. Mojo has no partial structs, so splitting would force free functions + parameter threading for little gain. ANSI colour constants and small utility functions live in `utils.mojo` (internal-only, all symbols `_`-prefixed). +- **No tokenizer.** Mojo standard library provides `sys.argv()` which already gives us a pre-split list of argument strings. We can work with this directly in `parse_arguments()` without a separate tokenization step. - **Composition-based.** `Command` gains a child command list. When `parse_arguments()` hits a non-option token matching a registered subcommand, it delegates the remaining argv slice to the child's own `parse_arguments()`. 100% logic reuse, zero duplication. #### Pre-requisite refactor (Step 0) @@ -504,7 +533,9 @@ if result.subcommand == "search": - [x] Update user manual with subcommand usage patterns - [x] Document persistent flag behavior and conflict rules -### Phase 5: Polish (nice-to-have features, most of which will be in v0.3, some may be deferred to v0.4+) +### Phase 5: Polish (v0.3 shipped; remaining features for v0.4+) + +Some features shipped in v0.3.0, others completed in the unreleased update branch. Remaining items may be deferred to v0.4+. #### Pre-requisite refactor @@ -533,22 +564,23 @@ Before adding Phase 5 features, further decompose `parse_arguments()` for readab - [x] **Shell completion script generation** — `generate_completion("bash"|"zsh"|"fish")` returns a complete completion script; static approach (no runtime hook), covers options/flags/choices/subcommands (clap `generate`, cobra `completion`, click `shell_complete`) - [ ] **Argument groups in help** — group related options under headings (argparse add_argument_group) - [ ] **Usage line customisation** — two approaches: (1) manual override via `.usage("...")` for git-style hand-written usage strings (e.g. `[-v | --version] [-h | --help] [-C ] ...`); (2) auto-expanded mode that enumerates every flag inline like argparse (good for small CLIs, noisy for large ones). Current default `[OPTIONS]` / `` is the cobra/clap/click convention and is the right default. -- [ ] **Partial parsing** — parse known args only, return unknown args as-is (argparse `parse_known_args`) -- [ ] **Require equals syntax** — force `--key=value`, disallow `--key value` (clap `require_equals`) -- [ ] **Default-if-no-value** — `--opt` (no value) → use default-if-no-value; `--opt=val` → use val; absent → use default (argparse `const`) -- [x] **Response file** — `mytool @args.txt` expands file contents as arguments (argparse `fromfile_prefix_chars`, javac, MSBuild) +- [x] **Partial parsing** — `parse_known_arguments()` collects unrecognised options instead of erroring; access via `result.get_unknown_args()` (argparse `parse_known_args`) (PR #13) +- [x] **Require equals syntax** — `.require_equals()` forces `--key=value`, disallows `--key value` (clap `require_equals`) (PR #12) +- [x] **Default-if-no-value** — `.default_if_no_value("val")`: `--opt` uses fallback; `--opt=val` uses val; absent uses default (argparse `const`) (PR #12) +- [x] **Response file** — `mytool @args.txt` expands file contents as arguments (argparse `fromfile_prefix_chars`, javac, MSBuild) (PR #12) ⚠ *Temporarily disabled — Mojo compiler deadlock under `-D ASSERT=all`* - [ ] **Argument parents** — share a common set of Argument definitions across multiple Commands (argparse `parents`) - [ ] **Interactive prompting** — prompt user for missing required args instead of erroring (Click `prompt=True`) - [ ] **Password / masked input** — hide typed characters for sensitive values (Click `hide_input=True`) - [ ] **Confirmation option** — built-in `--yes` / `-y` to skip confirmation prompts (Click `confirmation_option`) - [ ] **Pre/Post run hooks** — callbacks before/after main logic (cobra `PreRun`/`PostRun`) -- [ ] **REMAINDER number_of_values** — capture all remaining args including `-` prefixed ones (argparse `nargs=REMAINDER`) +- [x] **Remainder positional** — `.remainder()` consumes ALL remaining tokens (including `-` prefixed); at most one per command, must be last positional (argparse `nargs=REMAINDER`, clap `trailing_var_arg`) (PR #13) +- [x] **Allow hyphen values** — `.allow_hyphen_values()` on positional accepts dash-prefixed tokens as values without `--`; remainder enables this automatically (clap `allow_hyphen_values`) (PR #13) - [ ] **Regex validation** — `.pattern(r"^\d{4}-\d{2}-\d{2}$")` validates value format (no major library has this) - [x] **Mutual implication** — `command.implies("debug", "verbose")` — after parsing, if the trigger flag is set, automatically set the implied flag; support chained implication (`debug → verbose → log`); detect circular cycles at registration time (no major library has this built-in) - [ ] **Stdin value** — `.stdin_value()` on `Argument` — when parsed value is `"-"`, read from stdin; Unix convention (`cat file.txt | mytool --input -`) (cobra supports; depends on Mojo stdin API) - [x] **Subcommand aliases** — `sub.command_aliases(["co"])` registers shorthand names; typo suggestions and completions search aliases too (cobra `Command.Aliases`, clap `Command::alias`) -- [ ] **Hidden subcommands** — `sub.hidden()` — exclude from the "Commands:" section in help, still dispatchable by exact name (clap `Command::hide`, cobra `Hidden`) -- [ ] **`NO_COLOR` env variable** — honour the [no-color.org](https://no-color.org/) standard: if env `NO_COLOR` is set, suppress all ANSI colour output; lower priority than explicit `.color(False)` API call +- [x] **Hidden subcommands** — `sub.hidden()` — exclude from the "Commands:" section in help, completions, and error messages; dispatchable by exact name or alias (clap `Command::hide`, cobra `Hidden`) (PR #9) +- [x] **`NO_COLOR` env variable** — honour the [no-color.org](https://no-color.org/) standard: if env `NO_COLOR` is set (any value, including empty), suppress all ANSI colour output; lower priority than explicit `.color(False)` API call (PR #9) #### Explicitly Out of Scope in This Phase @@ -566,7 +598,7 @@ ArgMojo's differentiating features — no other CLI library addresses CJK-specif 這部分主要是為了讓 ArgMojo 在 CJK 環境下的使用體驗更好,解決一些常見的問題,比如幫助信息對齊、全角字符自動轉半角、CJK 標點檢測等。畢竟我總是忘了切換輸入法,打出中文的全角標點,然後被 CLI 報錯。 -#### 6.1 CJK-aware help formatting +#### 6.1 CJK-aware help formatting ✓ **Problem:** All Western CLI libraries (argparse, cobra, clap) assume 1 char = 1 column. CJK characters occupy 2 terminal columns (full-width), causing misaligned `--help` output when descriptions mix CJK and ASCII: @@ -577,12 +609,11 @@ ArgMojo's differentiating features — no other CLI library addresses CJK-specif **Implementation:** -- [ ] Implement `_display_width(s: String) -> Int` in `utils.mojo`, traversing each code point: - - CJK Unified Ideographs (`U+4E00`–`U+9FFF`), CJK Ext-A/B/C/D/E/F/G/H/I/J, fullwidth forms (`U+FF01`–`U+FF60`) → width 2 - - Other visible characters → width 1 - - Zero-width joiners, combining marks → width 0 -- [ ] Replace `len()` with `_display_width()` in all help formatting padding calculations (`_help_positionals_section`, `_help_options_section`, `_help_commands_section`) -- [ ] Add tests with mixed CJK/ASCII help text verifying column alignment +- [x] Implement `_display_width(s: String) -> Int` in `utils.mojo`, traversing each code point: + - CJK Unified Ideographs, CJK Ext-A/B/C/D/E/F/G/H/I/J, fullwidth forms → width 2 + - Other visible characters → width 1 (zero-width joiners and combining marks are rare in CLI help text and are not special-cased) +- [x] Replace `len()` with `_display_width()` in all help formatting padding calculations (`_help_positionals_section`, `_help_options_section`, `_help_commands_section`) +- [x] Add tests with mixed CJK/ASCII help text verifying column alignment **References:** POSIX `wcwidth(3)`, Python `unicodedata.east_asian_width()`, Rust `unicode-width` crate. diff --git a/docs/changelog.md b/docs/changelog.md index fb33237..dd08830 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -17,6 +17,7 @@ Comment out unreleased changes here. This file will be edited just before each r 5. Add `.remainder()` builder method on `Argument`. A remainder positional consumes **all** remaining tokens (including ones starting with `-`), similar to argparse `nargs=REMAINDER` or clap `trailing_var_arg`. At most one remainder positional is allowed per command and it must be the last positional (PR #13). 6. Add `parse_known_arguments()` method on `Command`. Like `parse_arguments()`, but unrecognised options are collected into the result instead of raising an error. Access them via `result.get_unknown_args()`. Useful for forwarding unknown flags to another program (PR #13). 7. Add `.allow_hyphen_values()` builder method on `Argument`. When set on a positional, values starting with `-` are accepted without requiring `--` (e.g., `-` for stdin). Remainder positionals have this enabled automatically (PR #13). +8. **CJK-aware help alignment.** Help output now computes column padding using terminal display width instead of byte length. CJK ideographs and fullwidth characters are correctly treated as 2-column-wide, so help descriptions stay aligned when option names, positional names, or subcommand names contain Chinese, Japanese, or Korean characters. ANSI escape sequences are skipped during width calculation. No API changes — this is automatic (PR #14). ### 🔧 Fixes and API changes diff --git a/docs/user_manual.md b/docs/user_manual.md index df4daaf..24fdeb5 100644 --- a/docs/user_manual.md +++ b/docs/user_manual.md @@ -64,6 +64,7 @@ from argmojo import Argument, Command - [Auto-generated Help](#auto-generated-help) - [Custom Tips](#custom-tips) - [Version Display](#version-display) + - [CJK-Aware Help Alignment](#cjk-aware-help-alignment) - [Parsing Behaviour](#parsing-behaviour) - [Negative Number Passthrough](#negative-number-passthrough) - [Long Option Prefix Matching](#long-option-prefix-matching) @@ -2207,9 +2208,7 @@ Options: -V, --version Show version ``` -Help text columns are **dynamically aligned**: the padding between the option -names and the description text adjusts automatically based on the longest -option line, so everything stays neatly aligned regardless of option length. +Help text columns are **dynamically aligned**: the padding between the option names and the description text adjusts automatically based on the longest option line, so everything stays neatly aligned regardless of option length. --- @@ -2236,9 +2235,7 @@ var help_plain = command._generate_help(color=False) # no ANSI codes **Custom Colours** -The **header colour**, **argument-name colour**, **deprecation warning -colour**, and **parse error colour** are all customisable. Section headers -always keep the **bold + underline** style; only the colour changes. +The **header colour**, **argument-name colour**, **deprecation warning colour**, and **parse error colour** are all customisable. Section headers always keep the **bold + underline** style; only the colour changes. ```mojo var command = Command("myapp", "My app") @@ -2264,9 +2261,7 @@ Available colour names (case-insensitive): An unrecognised colour name raises an `Error` at runtime. -Padding calculation is always based on the **plain-text width** (without -escape codes), so columns remain correctly aligned regardless of whether -colour is enabled. +Padding calculation is always based on the **plain-text width** (without escape codes), so columns remain correctly aligned regardless of whether colour is enabled. **What controls the output:** @@ -2302,8 +2297,7 @@ This takes priority over the `color=True` default but does **not** override an e **Show Help When No Arguments Provided** -Use `help_on_no_arguments()` to automatically display help when the user invokes -the command with no arguments (like `git`, `docker`, or `cargo`): +Use `help_on_no_arguments()` to automatically display help when the user invokes the command with no arguments (like `git`, `docker`, or `cargo`): ```mojo var command = Command("myapp", "My application") @@ -2317,14 +2311,11 @@ myapp # prints help and exits myapp --file x # normal parsing ``` -This is particularly useful for commands that require arguments — instead of -showing an obscure "missing required argument" error, the user sees the -full help text. +This is particularly useful for commands that require arguments — instead of showing an obscure "missing required argument" error, the user sees the full help text. ### Custom Tips -Add custom **tip lines** to the bottom of your help output with `add_tip()`. -This is useful for documenting common patterns, gotchas, or examples. +Add custom **tip lines** to the bottom of your help output with `add_tip()`. This is useful for documenting common patterns, gotchas, or examples. ```mojo var command = Command("calc", "A calculator") @@ -2352,10 +2343,7 @@ Tip: Use quotes if you use spaces in expressions. --- -**Smart default tip** — when positional arguments are defined, ArgMojo automatically adds a -built-in tip explaining the `--` separator. The example in this default tip adapts -based on whether negative numbers are auto-detected: if they are, it uses -`-my-value`; otherwise, it uses `-10.18`. +**Smart default tip** — when positional arguments are defined, ArgMojo automatically adds a built-in tip explaining the `--` separator. The example in this default tip adapts based on whether negative numbers are auto-detected: if they are, it uses `-my-value`; otherwise, it uses `-10.18`. User-defined tips appear **below** the built-in tip. @@ -2386,6 +2374,50 @@ var command = Command("myapp", "Description", version="1.0.0") After printing the version, the program exits cleanly with exit code 0. +### CJK-Aware Help Alignment + +ArgMojo automatically handles CJK (Chinese, Japanese, Korean) characters in help output. CJK ideographs and fullwidth characters occupy **two terminal columns** instead of one, so naïve byte- or codepoint-based padding would cause misaligned help columns. + +ArgMojo's help formatter uses **display width** (East Asian Width) to compute padding, so help descriptions stay aligned even when option names, positional names, subcommand names, or help text contain CJK characters. + +See the [Unicode East Asian Width specification](https://www.unicode.org/reports/tr11/) for details on CJK character ranges and properties. + +**Example — mixed ASCII and CJK options:** + +```mojo +var command = Command("工具", "一個命令行工具") +command.add_argument( + Argument("output", help="Output path").long("output").short("o") +) +command.add_argument( + Argument("編碼", help="設定編碼").long("編碼") +) +``` + +```txt +Options: + -o, --output Output path + --編碼 <編碼> 設定編碼 +``` + +**Example — CJK subcommands:** + +```mojo +var app = Command("工具", "一個命令行工具") +var init_cmd = Command("初始化", "建立新項目") +app.add_subcommand(init_cmd^) +var build_cmd = Command("構建", "編譯項目") +app.add_subcommand(build_cmd^) +``` + +```txt +Commands: + 初始化 建立新項目 + 構建 編譯項目 +``` + +No configuration is needed — CJK-aware alignment is always active. + ## Parsing Behaviour ### Negative Number Passthrough diff --git a/examples/yu.mojo b/examples/yu.mojo new file mode 100644 index 0000000..f78028a --- /dev/null +++ b/examples/yu.mojo @@ -0,0 +1,212 @@ +"""Example: Yuhao Input Method character code lookup. + +例:宇浩輸入法單字編碼查詢 + +A CJK-heavy demo that showcases ArgMojo's CJK-aware help alignment. +The purpose of the app is to lookup the encoding of Chinese characters in the +Yuhao Input Method (宇浩輸入法). + +In Yuhao Input Method, each Chinese character is represented by a 4-letter code +based on its components and radicals. For example, the character "字" is encoded +as "khvi" in the Lingming variant. + +Yuhao Input Method has several variants: The app supports looking up any variant +individually or all three side by side. + +For full character tables, see https://shurufa.app + +This demo app supports three Yuhao IME variants: + - 宇浩靈明 — default (used when no variant flag is given) + - 宇浩卿雲 (--joy) + - 宇浩星陳 (--star) + +Try these (build first with: `pixi run build`): + + ./yu --help + ./yu 字 + ./yu 宇浩靈明 + ./yu --joy 字根 + ./yu --star 你好 + ./yu --all 宇浩 + ./yu --version +""" + +from argmojo import Argument, Command + + +fn _build_ling_table() -> Dict[String, String]: + """Build 宇浩靈明 lookup table (20 high-frequency characters).""" + var d: Dict[String, String] = { + "的": "d", + "一": "fi", + "是": "i", + "不": "u", + "了": "a", + "人": "ne", + "我": "o", + "在": "mvu", + "有": "me", + "他": "jse", + "這": "rwo", + "個": "ju", + "上": "ka", + "來": "rla", + "到": "kva", + "大": "yda", + "中": "di", + "字": "khvi", + "宇": "kfjo", + "浩": "vmdo", + "你": "ja", + "好": "fhi", + } + return d^ + + +fn _build_joy_table() -> Dict[String, String]: + """Build 宇浩卿雲 lookup table (20 high-frequency characters).""" + var d: Dict[String, String] = { + "的": "d", + "一": "f", + "是": "j", + "不": "n", + "了": "l", + "人": "ur", + "我": "w", + "在": "xl", + "有": "x", + "他": "e", + "這": "ruc", + "個": "ebog", + "上": "o", + "來": "cl", + "到": "uo", + "大": "md", + "中": "k", + "字": "il", + "宇": "ife", + "浩": "npk", + "你": "eo", + "好": "wlz", + } + return d^ + + +fn _build_star_table() -> Dict[String, String]: + """Build 宇浩星陳 lookup table (20 high-frequency characters).""" + var d: Dict[String, String] = { + "的": "d", + "一": "f", + "是": "j", + "不": "v", + "了": "k", + "人": "r", + "我": "g", + "在": "eu", + "有": "ew", + "他": "eo", + "這": "bocy", + "個": "ewj", + "上": "jv", + "來": "all", + "到": "dm", + "大": "o", + "中": "l", + "字": "ikz", + "宇": "ifk", + "浩": "npl", + "你": "e", + "好": "c", + } + return d^ + + +fn _lookup(table: Dict[String, String], ch: String) raises -> String: + if ch in table: + return table[ch] + return "(未收錄)" + + +fn main() raises: + var app = Command( + "yu", + "宇浩輸入法單字編碼查詢。完整碼表請見 https://shurufa.app", + version="0.1.0", + ) + + app.add_argument( + Argument("漢字", help="要查詢的漢字(可以輸入多個漢字)").positional().required() + ) + app.add_argument( + Argument("joy", help="使用卿雲編碼(預設為靈明)").long("joy").short("j").flag() + ) + app.add_argument( + Argument("star", help="使用星陳編碼(預設為靈明)").long("star").short("s").flag() + ) + app.add_argument( + Argument("all", help="同時顯示靈明、卿雲、星陳編碼").long("all").short("a").flag() + ) + + app.add_tip("完整碼表與教程請訪問 https://shurufa.app") + + var args = app.parse() + var input = args.get_string("漢字") + var use_joy = args.get_flag("joy") + var use_star = args.get_flag("star") + var show_all = args.get_flag("all") + + var ling = _build_ling_table() + var joy = _build_joy_table() + var star = _build_star_table() + + # Extract individual codepoints from the UTF-8 input string. + var chars = List[String]() + var bytes = input.as_bytes() + var i = 0 + var n = len(bytes) + while i < n: + var b0 = Int(bytes[i]) + var seq_len: Int + if b0 < 0x80: + seq_len = 1 + elif b0 < 0xE0: + seq_len = 2 + elif b0 < 0xF0: + seq_len = 3 + else: + seq_len = 4 + chars.append(String(input[i : i + seq_len])) + i += seq_len + + if show_all: + print("漢字\t靈明\t卿雲\t星陳") + print("────\t────\t────\t────") + for k in range(len(chars)): + var ch = chars[k] + print( + ch + + "\t" + + _lookup(ling, ch) + + "\t" + + _lookup(joy, ch) + + "\t" + + _lookup(star, ch) + ) + elif use_star: + print("漢字\t星陳編碼") + print("────\t────────") + for k in range(len(chars)): + var ch = chars[k] + print(ch + "\t" + _lookup(star, ch)) + elif use_joy: + print("漢字\t卿雲編碼") + print("────\t────────") + for k in range(len(chars)): + var ch = chars[k] + print(ch + "\t" + _lookup(joy, ch)) + else: + print("漢字\t靈明編碼") + print("────\t────────") + for k in range(len(chars)): + var ch = chars[k] + print(ch + "\t" + _lookup(ling, ch)) diff --git a/pixi.toml b/pixi.toml index d12f156..070c081 100644 --- a/pixi.toml +++ b/pixi.toml @@ -48,7 +48,9 @@ test = """\ build = """pixi run package \ && mojo build -I src examples/mgrep.mojo -o mgrep \ && mojo build -I src examples/mgit.mojo -o mgit \ -&& mojo build -I src examples/demo.mojo -o demo""" +&& mojo build -I src examples/demo.mojo -o demo \ +&& mojo build -I src examples/yu.mojo -o yu \ +""" # clean build artifacts -clean = "rm -f argmojo.mojopkg mgrep mgit demo" +clean = "rm -f argmojo.mojopkg mgrep mgit demo yu" diff --git a/src/argmojo/command.mojo b/src/argmojo/command.mojo index 12e0beb..6e508a5 100644 --- a/src/argmojo/command.mojo +++ b/src/argmojo/command.mojo @@ -13,6 +13,7 @@ from .utils import ( _DEFAULT_ARG_COLOR, _DEFAULT_WARN_COLOR, _DEFAULT_ERROR_COLOR, + _display_width, _looks_like_number, _resolve_color, _suggest_similar, @@ -2986,8 +2987,9 @@ struct Command(Copyable, Movable, Stringable, Writable): var pos_max: Int = 0 for k in range(len(pos_plains)): - if len(pos_plains[k]) > pos_max: - pos_max = len(pos_plains[k]) + var w = _display_width(pos_plains[k]) + if w > pos_max: + pos_max = w var pos_pad = pos_max + 4 var s = header_color + "Arguments:" + reset_code + "\n" @@ -2995,7 +2997,7 @@ struct Command(Copyable, Movable, Stringable, Writable): var line = pos_colors[k] if pos_helps[k]: # Pad based on plain-text width. - var padding = pos_pad - len(pos_plains[k]) + var padding = pos_pad - _display_width(pos_plains[k]) for _p in range(padding): line += " " line += pos_helps[k] @@ -3226,12 +3228,13 @@ struct Command(Copyable, Movable, Stringable, Writable): var local_max: Int = 0 var global_max: Int = 0 for k in range(len(opt_plains)): + var w = _display_width(opt_plains[k]) if opt_persistent[k]: - if len(opt_plains[k]) > global_max: - global_max = len(opt_plains[k]) + if w > global_max: + global_max = w else: - if len(opt_plains[k]) > local_max: - local_max = len(opt_plains[k]) + if w > local_max: + local_max = w var local_pad = local_max + 4 var global_pad = global_max + 4 @@ -3242,7 +3245,7 @@ struct Command(Copyable, Movable, Stringable, Writable): if not opt_persistent[k]: var line = opt_colors[k] if opt_helps[k]: - var padding = local_pad - len(opt_plains[k]) + var padding = local_pad - _display_width(opt_plains[k]) for _p in range(padding): line += " " line += opt_helps[k] @@ -3255,7 +3258,7 @@ struct Command(Copyable, Movable, Stringable, Writable): if opt_persistent[k]: var line = opt_colors[k] if opt_helps[k]: - var padding = global_pad - len(opt_plains[k]) + var padding = global_pad - _display_width(opt_plains[k]) for _p in range(padding): line += " " line += opt_helps[k] @@ -3324,14 +3327,15 @@ struct Command(Copyable, Movable, Stringable, Writable): # Compute padding. var cmd_max: Int = 0 for k in range(len(cmd_plains)): - if len(cmd_plains[k]) > cmd_max: - cmd_max = len(cmd_plains[k]) + var w = _display_width(cmd_plains[k]) + if w > cmd_max: + cmd_max = w var cmd_pad = cmd_max + 4 var s = "\n" + header_color + "Commands:" + reset_code + "\n" for k in range(len(cmd_plains)): var line = cmd_colors[k] if cmd_helps[k]: - var padding = cmd_pad - len(cmd_plains[k]) + var padding = cmd_pad - _display_width(cmd_plains[k]) for _p in range(padding): line += " " line += cmd_helps[k] diff --git a/src/argmojo/utils.mojo b/src/argmojo/utils.mojo index 6882311..4a2d39a 100644 --- a/src/argmojo/utils.mojo +++ b/src/argmojo/utils.mojo @@ -24,6 +24,192 @@ comptime _DEFAULT_ERROR_COLOR = _RED # ── Utility functions ──────────────────────────────────────────────────────── +fn _is_wide_codepoint(codepoint: Int) -> Bool: + """Returns True if the Unicode codepoint occupies two terminal columns. + + Covers CJK Unified Ideographs, CJK Compatibility Ideographs, + CJK Extension blocks (A-J), Fullwidth Forms, and a selection of + other commonly wide ranges (Hangul Syllables, CJK Symbols, etc.). + """ + + # Unicode East Asian Width W/F 範圍表(Unicode 16.0) + # 相鄰區塊已合併以減少分支數(38 → 15)。 + # + # ── 合併區 [0x2E80, 0x9FFF] ────────────────────────────────────── + # [0x2E80, 0x2EFF] # 中日韓漢字部首補充 + # [0x2F00, 0x2FDF] # 康熙部首 + # [0x2FF0, 0x2FFF] # 表意文字描述字符 + # [0x3000, 0x303F] # 中日韓符號和標點 + # [0x3040, 0x309F] # 平假名 + # [0x30A0, 0x30FF] # 片假名 + # [0x3100, 0x312F] # 注音符號 + # [0x3130, 0x318F] # 諺文兼容字母 + # [0x3190, 0x319F] # 漢文訓讀 + # [0x31A0, 0x31BF] # 注音符號擴展 + # [0x31C0, 0x31EF] # 中日韓筆畫 + # [0x31F0, 0x31FF] # 片假名音標擴展 + # [0x3200, 0x32FF] # 中日韓帶圈字符及月份 + # [0x3300, 0x33FF] # 中日韓兼容字符 + # [0x3400, 0x4DBF] # 中日韓統一表意文字擴展區A + # [0x4DC0, 0x4DFF] # 易經六十四卦 + # [0x4E00, 0x9FFF] # 中日韓統一表意文字 + # + # ── 合併區 [0xE000, 0xFAFF] ────────────────────────────────────── + # [0xE000, 0xF8FF] # 私用區(宇浩字根在此區,EAW=A) + # [0xF900, 0xFAFF] # 中日韓兼容表意文字 + # + # ── 合併區 [0x16FE0, 0x18D7F] ──────────────────────────────────── + # [0x16FE0, 0x16FFF] # 表意文字符號和標點 + # [0x17000, 0x187FF] # 西夏文 + # [0x18800, 0x18AFF] # 西夏文部件 + # [0x18B00, 0x18CFF] # 契丹小字 + # [0x18D00, 0x18D7F] # 西夏文補充 + # + # ── 合併區 [0x1B000, 0x1B16F] ──────────────────────────────────── + # [0x1B000, 0x1B0FF] # 補充假名 + # [0x1B100, 0x1B12F] # 假名擴展 + # [0x1B130, 0x1B16F] # 小型假名擴展 + # + # ── 獨立區 ──────────────────────────────────────────────────────── + # [0x1100, 0x115F] # 諺文字母初聲 + # [0xA960, 0xA97C] # 諺文字母擴展A + # [0xAC00, 0xD7AF] # 韓文音節 + # [0xFE30, 0xFE4F] # 中日韓兼容形式 + # [0xFF01, 0xFF60] # 全形ASCII和標點 + # [0xFFE0, 0xFFE6] # 全形特殊符號 + # [0x1D300, 0x1D35F] # 太玄經卦爻 + # [0x1F200, 0x1F2FF] # 帶圈表意文字補充 + # [0x20000, 0x2EE5F] # 中日韓統一表意文字擴展區BCDEFI + # [0x2F800, 0x2FA1F] # 中日韓兼容表意文字補充 + # [0x30000, 0x3347F] # 中日韓統一表意文字擴展區GHJ + # + # ── 已排除(EAW ≠ W/F)─────────────────────────────────────────── + # [0x1160, 0x11FF] # 諺文字母中聲/終聲 — EAW=N + # [0x2600, 0x26FF] # 雜項符號 — 多數 EAW=N + # [0xD7B0, 0xD7FF] # 諺文字母擴展B — EAW=N + # [0x1F000, 0x1F02F] # 麻將牌 — 僅 U+1F004 為 W + # [0x1FA00, 0x1FA6F] # 棋類符號 — EAW=N + + # Fast path: ASCII and Latin/Greek/Cyrillic/Arabic etc. + if codepoint < 0x1100: + return False + # Fast path: above all known wide ranges. + if codepoint > 0x3347F: + return False + + # ── BMP: U+0000 – U+FFFF ──────────────────────────────────────── + if codepoint <= 0xFFFF: + # CJK 大區(17 個相鄰區塊合併) + if codepoint >= 0x2E80 and codepoint <= 0x9FFF: + return True + # 韓文音節 + if codepoint >= 0xAC00 and codepoint <= 0xD7AF: + return True + # 私用區 + 中日韓兼容表意文字 + if codepoint >= 0xE000 and codepoint <= 0xFAFF: + return True + # 全形 ASCII 和標點 + if codepoint >= 0xFF01 and codepoint <= 0xFF60: + return True + # 諺文字母擴展A + if codepoint >= 0xA960 and codepoint <= 0xA97C: + return True + # 全形特殊符號 + if codepoint >= 0xFFE0 and codepoint <= 0xFFE6: + return True + # 中日韓兼容形式 + if codepoint >= 0xFE30 and codepoint <= 0xFE4F: + return True + # 諺文字母初聲(0x1100 ≤ cp guaranteed by early exit) + return codepoint <= 0x115F + + # ── SMP/SIP/TIP: U+10000+ ─────────────────────────────────────── + # CJK 統一漢字擴展 BCDEFI + if codepoint >= 0x20000 and codepoint <= 0x2EE5F: + return True + # CJK 統一漢字擴展 GHJ(≤ 0x3347F guaranteed by early exit) + if codepoint >= 0x30000: + return True + # 表意符號 + 西夏文 + 西夏部件 + 契丹小字 + 西夏補充 + if codepoint >= 0x16FE0 and codepoint <= 0x18D7F: + return True + # 假名補充 + 假名擴展 + 小型假名擴展 + if codepoint >= 0x1B000 and codepoint <= 0x1B16F: + return True + # CJK 兼容表意文字補充 + if codepoint >= 0x2F800 and codepoint <= 0x2FA1F: + return True + # 帶圈表意文字補充 + if codepoint >= 0x1F200 and codepoint <= 0x1F2FF: + return True + # 太玄經卦爻 + if codepoint >= 0x1D300 and codepoint <= 0x1D35F: + return True + return False + + +fn _display_width(s: String) -> Int: + """Returns the terminal display width of a string. + + CJK characters and fullwidth forms count as 2 columns each. ANSI + escape sequences (e.g. colour codes) are skipped and contribute 0. + All other visible characters count as 1. This function is used by + the help formatter to align columns correctly with mixed CJK/ASCII + text. + + Args: + s: The string to measure. + + Returns: + The number of terminal columns the string would occupy. + """ + var width = 0 + var i = 0 + var bytes = s.as_bytes() + var n = len(bytes) + while i < n: + var b0 = Int(bytes[i]) + # Skip ANSI escape sequences: ESC [ ... final_byte. + if b0 == 0x1B and i + 1 < n and Int(bytes[i + 1]) == 0x5B: + i += 2 # skip ESC [ + while i < n: + var c = Int(bytes[i]) + i += 1 + # Final byte of CSI sequence is in 0x40–0x7E range. + if c >= 0x40 and c <= 0x7E: + break + continue + # Decode UTF-8 codepoint. + var codepoint: Int + var seq_len: Int + if b0 < 0x80: + codepoint = b0 + seq_len = 1 + elif b0 < 0xC0: + # Continuation byte (shouldn't start a sequence); skip. + i += 1 + continue + elif b0 < 0xE0: + seq_len = 2 + codepoint = b0 & 0x1F + elif b0 < 0xF0: + seq_len = 3 + codepoint = b0 & 0x0F + else: + seq_len = 4 + codepoint = b0 & 0x07 + for j in range(1, seq_len): + if i + j < n: + codepoint = (codepoint << 6) | (Int(bytes[i + j]) & 0x3F) + i += seq_len + # Determine display width of this codepoint. + if _is_wide_codepoint(codepoint): + width += 2 + else: + width += 1 + return width + + fn _looks_like_number(token: String) -> Bool: """Returns True if *token* is a negative-number literal. diff --git a/tests/test_help.mojo b/tests/test_help.mojo index db8b7d9..0a1d0c1 100644 --- a/tests/test_help.mojo +++ b/tests/test_help.mojo @@ -3,6 +3,7 @@ from testing import assert_true, assert_false, assert_equal, TestSuite import argmojo from argmojo import Argument, Command, ParseResult +from argmojo.utils import _display_width # ── Hidden arguments ────────────────────────────────────────────────────────────── @@ -859,5 +860,121 @@ fn test_no_color_env_static_method() raises: print(" ✓ test_no_color_env_static_method (NO_COLOR is not set)") +# ── CJK-aware help alignment ──────────────────────────────────────────────── + + +fn test_cjk_options_aligned() raises: + """Tests that CJK help text doesn't break column alignment.""" + var command = Command("test", "測試應用") + command.add_argument( + Argument("verbose", help="顯示詳細資訊").long("verbose").short("v").flag() + ) + command.add_argument( + Argument("output", help="輸出路徑").long("output").short("o") + ) + + var help = command._generate_help(color=False) + # Both help descriptions should start at the same display column. + var col_verbose: Int = -1 + var col_output: Int = -1 + var lines = help.splitlines() + for idx in range(len(lines)): + if "--verbose" in lines[idx]: + var bp = lines[idx].find("顯示詳細資訊") + col_verbose = _display_width(String(lines[idx][0:bp])) + if "--output" in lines[idx]: + var bp = lines[idx].find("輸出路徑") + col_output = _display_width(String(lines[idx][0:bp])) + assert_true(col_verbose > 0, msg="verbose help should appear") + assert_true(col_output > 0, msg="output help should appear") + assert_equal( + col_verbose, + col_output, + msg="CJK help descriptions should be aligned at the same column", + ) + + +fn test_cjk_subcommands_aligned() raises: + """Tests that CJK subcommand descriptions align correctly.""" + var app = Command("工具", "一個命令行工具") + var init = Command("初始化", "建立新項目") + app.add_subcommand(init^) + var build = Command("構建", "編譯項目") + app.add_subcommand(build^) + + var help = app._generate_help(color=False) + var col_init: Int = -1 + var col_build: Int = -1 + var lines = help.splitlines() + for idx in range(len(lines)): + if "初始化" in lines[idx] and "建立新項目" in lines[idx]: + var bp = lines[idx].find("建立新項目") + col_init = _display_width(String(lines[idx][0:bp])) + if "構建" in lines[idx] and "編譯項目" in lines[idx]: + var bp = lines[idx].find("編譯項目") + col_build = _display_width(String(lines[idx][0:bp])) + assert_true(col_init > 0, msg="init description should appear") + assert_true(col_build > 0, msg="build description should appear") + assert_equal( + col_init, + col_build, + msg="CJK subcommand descriptions should be aligned", + ) + + +fn test_cjk_positionals_aligned() raises: + """Tests that CJK positional argument help aligns correctly.""" + var command = Command("test", "測試") + command.add_argument(Argument("檔案", help="輸入檔案路徑")) + command.add_argument(Argument("目標", help="輸出目標位置")) + + var help = command._generate_help(color=False) + var col_file: Int = -1 + var col_target: Int = -1 + var lines = help.splitlines() + for idx in range(len(lines)): + if "檔案" in lines[idx] and "輸入檔案路徑" in lines[idx]: + var bp = lines[idx].find("輸入檔案路徑") + col_file = _display_width(String(lines[idx][0:bp])) + if "目標" in lines[idx] and "輸出目標位置" in lines[idx]: + var bp = lines[idx].find("輸出目標位置") + col_target = _display_width(String(lines[idx][0:bp])) + assert_true(col_file > 0, msg="file help should appear") + assert_true(col_target > 0, msg="target help should appear") + assert_equal( + col_file, + col_target, + msg="CJK positional descriptions should be aligned", + ) + + +fn test_mixed_ascii_cjk_aligned() raises: + """Tests alignment when mixing ASCII and CJK option names.""" + var command = Command("test", "Test app") + command.add_argument( + Argument("output", help="Output path").long("output").short("o") + ) + command.add_argument(Argument("編碼", help="設定編碼").long("編碼")) + + var help = command._generate_help(color=False) + var col_output: Int = -1 + var col_enc: Int = -1 + var lines = help.splitlines() + for idx in range(len(lines)): + if "--output" in lines[idx]: + var bp = lines[idx].find("Output path") + col_output = _display_width(String(lines[idx][0:bp])) + if "--編碼" in lines[idx]: + var bp = lines[idx].find("設定編碼") + col_enc = _display_width(String(lines[idx][0:bp])) + assert_true(col_output > 0, msg="output help should appear") + assert_true(col_enc > 0, msg="encoding help should appear") + assert_equal( + col_output, + col_enc, + msg="Mixed ASCII/CJK option help should be aligned", + ) + + fn main() raises: TestSuite.discover_tests[__functions_in_module()]().run()