1- """check-language command — scan Markdown artifacts for disallowed Unicode scripts."""
1+ """check-language command — scan Markdown artifacts for disallowed Unicode scripts.
22
3+ @cpt-algo:cpt-cypilot-flow-traceability-validation-check-language:p1
4+ """
5+ # @cpt-begin:cpt-cypilot-flow-traceability-validation-check-language:p1:inst-check-lang-imports
36import argparse
47from pathlib import Path
58from typing import List
69
710from ..utils import error_codes as EC
811from ..utils .ui import ui
12+ # @cpt-end:cpt-cypilot-flow-traceability-validation-check-language:p1:inst-check-lang-imports
913
1014
15+ # @cpt-begin:cpt-cypilot-flow-traceability-validation-check-language:p1:inst-cmd-check-language
1116def cmd_check_language (argv : List [str ]) -> int :
1217 """Scan Markdown files for characters outside the allowed language set.
1318
@@ -37,28 +42,25 @@ def cmd_check_language(argv: List[str]) -> int:
3742 help = "Comma-separated language codes to allow, e.g. 'en' or 'en,ru'. "
3843 "Overrides workspace config." ,
3944 )
40- p .add_argument (
41- "--exclude" ,
42- action = "append" ,
43- default = [],
44- metavar = "GLOB" ,
45- dest = "exclude" ,
46- help = (
47- "Glob pattern for paths to skip (relative to each scan root). "
48- "Repeatable: --exclude 'translations/**' --exclude 'specs/i18n/*.md'. "
49- "Merged with check_language_ignore_paths from workspace config."
50- ),
51- )
5245 p .add_argument (
5346 "--quiet" ,
5447 "-q" ,
5548 action = "store_true" ,
5649 help = "Suppress summary header; show violations only." ,
5750 )
51+ p .add_argument (
52+ "--ignore" ,
53+ action = "append" ,
54+ default = [],
55+ metavar = "PATTERN" ,
56+ help = "Glob pattern of files to skip (e.g. 'translations/**/*.md'). "
57+ "Can be repeated. Also reads ignore_paths from workspace config." ,
58+ )
5859 args = p .parse_args (argv )
5960
6061 from ..utils .content_language import (
6162 SUPPORTED_LANGUAGES ,
63+ LangScanError ,
6264 build_allowed_ranges ,
6365 scan_paths ,
6466 )
@@ -78,10 +80,19 @@ def cmd_check_language(argv: List[str]) -> int:
7880 return 1
7981 allowed_langs = raw_langs
8082 else :
81- allowed_langs = _read_config_languages ()
83+ try :
84+ allowed_langs = _read_config_languages ()
85+ except ValueError as exc :
86+ ui .result ({"status" : "ERROR" , "message" : str (exc )})
87+ return 1
8288
83- # ── Resolve ignore globs ─────────────────────────────────────────────────
84- ignore_globs : List [str ] = list (args .exclude ) + _read_config_ignore_paths ()
89+ # ── Resolve ignore patterns ──────────────────────────────────────────────
90+ ignore_patterns : List [str ] = list (args .ignore )
91+ try :
92+ ignore_patterns .extend (_read_config_ignore_patterns ())
93+ except ValueError as exc :
94+ ui .result ({"status" : "ERROR" , "message" : str (exc )})
95+ return 1
8596
8697 # ── Resolve scan roots ───────────────────────────────────────────────────
8798 if args .paths :
@@ -99,14 +110,10 @@ def cmd_check_language(argv: List[str]) -> int:
99110
100111 # ── Scan ─────────────────────────────────────────────────────────────────
101112 allowed_ranges = build_allowed_ranges (allowed_langs )
102- from ..utils .content_language import LangScanError
103113 try :
104- violations = scan_paths (roots , allowed_ranges , ignore_globs = ignore_globs or None )
114+ violations = scan_paths (roots , allowed_ranges , ignore_patterns = ignore_patterns )
105115 except LangScanError as exc :
106- ui .result ({
107- "status" : "ERROR" ,
108- "message" : str (exc ),
109- })
116+ ui .result ({"status" : "ERROR" , "message" : str (exc )})
110117 return 1
111118
112119 files_scanned = _count_md_files (roots )
@@ -118,8 +125,6 @@ def cmd_check_language(argv: List[str]) -> int:
118125 "files_scanned" : files_scanned ,
119126 "violation_count" : 0 ,
120127 }
121- if ignore_globs :
122- result ["ignore_globs" ] = ignore_globs
123128 ui .result (result , human_fn = lambda d : _human_result (d , quiet = args .quiet ))
124129 return 0
125130
@@ -147,51 +152,58 @@ def cmd_check_language(argv: List[str]) -> int:
147152 "file_count" : len (by_file ),
148153 "violations" : violation_items ,
149154 }
150- if ignore_globs :
151- result ["ignore_globs" ] = ignore_globs
152155 ui .result (result , human_fn = lambda d : _human_result (d , quiet = args .quiet ))
153156 return 2
154157
158+ # @cpt-end:cpt-cypilot-flow-traceability-validation-check-language:p1:inst-cmd-check-language
159+
155160
156161# ---------------------------------------------------------------------------
157162# Helpers
158163# ---------------------------------------------------------------------------
164+ # @cpt-begin:cpt-cypilot-flow-traceability-validation-check-language:p1:inst-helpers
159165
160166def _read_config_languages () -> List [str ]:
161- """Read allowed_content_languages from workspace config; fall back to ['en']."""
162- try :
163- from ..utils .context import get_context
164- from ..utils .workspace import find_workspace_config
167+ """Read allowed_content_languages from workspace config; fall back to ['en'].
165168
166- ctx = get_context ()
167- if ctx is None :
168- return ["en" ]
169- _ws_cfg , _ = find_workspace_config (ctx .project_root )
170- if _ws_cfg is not None and _ws_cfg .validation is not None : # type: ignore[union-attr]
171- langs = _ws_cfg .validation .allowed_content_languages # type: ignore[union-attr]
172- if langs :
173- return langs
174- except Exception :
175- pass
169+ Raises ValueError if the workspace config file exists but cannot be parsed.
170+ """
171+ from ..utils .context import get_context
172+ from ..utils .workspace import find_workspace_config
173+
174+ ctx = get_context ()
175+ if ctx is None :
176+ return ["en" ]
177+ _ws_cfg , _ws_err = find_workspace_config (ctx .project_root )
178+ if _ws_err :
179+ raise ValueError (f"Workspace config error: { _ws_err } " )
180+ if _ws_cfg is not None and _ws_cfg .validation is not None : # type: ignore[union-attr]
181+ langs = _ws_cfg .validation .allowed_content_languages # type: ignore[union-attr]
182+ if langs :
183+ return langs
176184 return ["en" ]
177185
178186
179- def _read_config_ignore_paths () -> List [str ]:
180- """Read check_language_ignore_paths from workspace config; fall back to []."""
181- try :
182- from ..utils .context import get_context
183- from ..utils .workspace import find_workspace_config
187+ def _read_config_ignore_patterns () -> List [str ]:
188+ """Read ignore_paths glob patterns from workspace config.
184189
185- ctx = get_context ()
186- if ctx is None :
187- return []
188- _ws_cfg , _ = find_workspace_config (ctx .project_root )
189- if _ws_cfg is not None and _ws_cfg .validation is not None : # type: ignore[union-attr]
190- paths = _ws_cfg .validation .check_language_ignore_paths # type: ignore[union-attr]
191- if paths :
192- return list (paths )
193- except Exception :
194- pass
190+ Returns an empty list when the workspace config is absent or has no
191+ ignore_paths setting. Raises ValueError if the config file cannot be
192+ parsed.
193+ """
194+ from ..utils .context import get_context
195+ from ..utils .workspace import find_workspace_config
196+
197+ ctx = get_context ()
198+ if ctx is None :
199+ return []
200+ _ws_cfg , _ws_err = find_workspace_config (ctx .project_root )
201+ if _ws_err :
202+ raise ValueError (f"Workspace config error: { _ws_err } " )
203+ if _ws_cfg is not None and _ws_cfg .validation is not None : # type: ignore[union-attr]
204+ patterns = getattr (_ws_cfg .validation , "ignore_paths" , None )
205+ if patterns :
206+ return list (patterns )
195207 return []
196208
197209
@@ -203,7 +215,7 @@ def _default_roots() -> List[Path]:
203215 ctx = get_context ()
204216 if ctx is not None :
205217 return [ctx .project_root / "architecture" ]
206- except (ImportError , AttributeError ):
218+ except (ImportError , AttributeError , RuntimeError ):
207219 pass
208220 return [Path .cwd () / "architecture" ]
209221
@@ -218,10 +230,13 @@ def _count_md_files(roots: List[Path]) -> int:
218230 count += sum (1 for _ in root .rglob ("*.md" ))
219231 return count
220232
233+ # @cpt-end:cpt-cypilot-flow-traceability-validation-check-language:p1:inst-helpers
234+
221235
222236# ---------------------------------------------------------------------------
223237# Human formatter
224238# ---------------------------------------------------------------------------
239+ # @cpt-begin:cpt-cypilot-flow-traceability-validation-check-language:p1:inst-human-result
225240
226241def _human_result (data : dict , quiet : bool = False ) -> None :
227242 status = data .get ("status" , "" )
@@ -266,12 +281,6 @@ def _human_result(data: dict, quiet: bool = False) -> None:
266281 " [validation]\n "
267282 " allowed_content_languages = [\" en\" , \" ru\" ]"
268283 )
269- ui .hint (
270- "To ignore specific paths (e.g. translation specs), use --exclude or add to config:\n "
271- " [validation]\n "
272- " check_language_ignore_paths = [\" translations/**\" , \" specs/i18n/*.md\" ]\n "
273- "To ignore a single file, add <!-- cpt-lang: ignore --> anywhere in the file."
274- )
275- if data .get ("ignore_globs" ):
276- ui .detail ("Active ignore globs" , ", " .join (data ["ignore_globs" ]))
277284 ui .blank ()
285+
286+ # @cpt-end:cpt-cypilot-flow-traceability-validation-check-language:p1:inst-human-result
0 commit comments