cyberfabric
diff --git a/‎.bootstrap/.core/skills/cypilot/scripts/cypilot/cli.py‎
Lines changed: 7 additions & 1 deletion b/‎.bootstrap/.core/skills/cypilot/scripts/cypilot/cli.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎.bootstrap/.core/skills/cypilot/scripts/cypilot/commands/check_language.py‎
Lines changed: 277 additions & 0 deletions b/‎.bootstrap/.core/skills/cypilot/scripts/cypilot/commands/check_language.py‎
Lines changed: 277 additions & 0 deletions
diff --git a/‎.bootstrap/.core/skills/cypilot/scripts/cypilot/commands/validate.py‎
Lines changed: 77 additions & 0 deletions b/‎.bootstrap/.core/skills/cypilot/scripts/cypilot/commands/validate.py‎
Lines changed: 77 additions & 0 deletions
@@ -154,6 +154,10 @@ def _cmd_doctor(argv: List[str]) -> int:
 def _cmd_delegate(argv: List[str]) -> int:
     from .commands.delegate import cmd_delegate
     return cmd_delegate(argv)
+
+def _cmd_check_language(argv: List[str]) -> int:
+    from .commands.check_language import cmd_check_language
+    return cmd_check_language(argv)
 # @cpt-end:cpt-cypilot-algo-core-infra-route-command:p1:inst-route-helpers
 
 # =============================================================================
@@ -180,7 +184,7 @@ def main(argv: Optional[List[str]] = None) -> int:
     # Context may be None if Cypilot not initialized - that's OK for some commands like init
 
     # Define all available commands
-    analysis_commands = ["validate", "validate-kits", "validate-toc", "spec-coverage"]
+    analysis_commands = ["validate", "validate-kits", "validate-toc", "spec-coverage", "check-language"]
     legacy_aliases = ["validate-code", "validate-rules"]
     kit_commands = ["kit"]
     utility_commands = ["toc", "chunk-input"]
@@ -359,6 +363,8 @@ def main(argv: Optional[List[str]] = None) -> int:
         return _cmd_delegate(rest)
     elif cmd == "doctor":
         return _cmd_doctor(rest)
+    elif cmd == "check-language":
+        return _cmd_check_language(rest)
     else:
         # @cpt-begin:cpt-cypilot-algo-core-infra-route-command:p1:inst-if-no-handler
         # @cpt-begin:cpt-cypilot-algo-core-infra-route-command:p1:inst-return-unknown
 
@@ -0,0 +1,277 @@
+"""check-language command — scan Markdown artifacts for disallowed Unicode scripts."""
+
+import argparse
+from pathlib import Path
+from typing import List
+
+from ..utils import error_codes as EC
+from ..utils.ui import ui
+
+
+def cmd_check_language(argv: List[str]) -> int:
+    """Scan Markdown files for characters outside the allowed language set.
+
+    Exit codes:
+        0 — all files pass
+        1 — configuration / path error
+        2 — one or more language violations found
+    """
+    p = argparse.ArgumentParser(
+        prog="check-language",
+        description=(
+            "Scan Markdown artifacts for characters outside the allowed Unicode "
+            "script set.  Language policy is read from workspace config "
+            "([validation] allowed_content_languages) or set via --languages."
+        ),
+    )
+    p.add_argument(
+        "paths",
+        nargs="*",
+        metavar="path",
+        help="Files or directories to scan (default: project architecture/ folder)",
+    )
+    p.add_argument(
+        "--languages",
+        default=None,
+        metavar="CODES",
+        help="Comma-separated language codes to allow, e.g. 'en' or 'en,ru'. "
+             "Overrides workspace config.",
+    )
+    p.add_argument(
+        "--exclude",
+        action="append",
+        default=[],
+        metavar="GLOB",
+        dest="exclude",
+        help=(
+            "Glob pattern for paths to skip (relative to each scan root). "
+            "Repeatable: --exclude 'translations/**' --exclude 'specs/i18n/*.md'. "
+            "Merged with check_language_ignore_paths from workspace config."
+        ),
+    )
+    p.add_argument(
+        "--quiet",
+        "-q",
+        action="store_true",
+        help="Suppress summary header; show violations only.",
+    )
+    args = p.parse_args(argv)
+
+    from ..utils.content_language import (
+        SUPPORTED_LANGUAGES,
+        build_allowed_ranges,
+        scan_paths,
+    )
+
+    # ── Resolve allowed languages ────────────────────────────────────────────
+    if args.languages is not None:
+        raw_langs = [lang_code.strip().lower() for lang_code in args.languages.split(",") if lang_code.strip()]
+        unknown = [lang_code for lang_code in raw_langs if lang_code not in SUPPORTED_LANGUAGES]
+        if unknown:
+            ui.result({
+                "status": "ERROR",
+                "message": (
+                    f"Unknown language code(s): {', '.join(unknown)}. "
+                    f"Supported: {', '.join(SUPPORTED_LANGUAGES)}"
+                ),
+            })
+            return 1
+        allowed_langs = raw_langs
+    else:
+        allowed_langs = _read_config_languages()
+
+    # ── Resolve ignore globs ─────────────────────────────────────────────────
+    ignore_globs: List[str] = list(args.exclude) + _read_config_ignore_paths()
+
+    # ── Resolve scan roots ───────────────────────────────────────────────────
+    if args.paths:
+        roots = [Path(pth) for pth in args.paths]
+    else:
+        roots = _default_roots()
+
+    missing = [str(r) for r in roots if not r.exists()]
+    if missing:
+        ui.result({
+            "status": "ERROR",
+            "message": f"Path(s) not found: {', '.join(missing)}",
+        })
+        return 1
+
+    # ── Scan ─────────────────────────────────────────────────────────────────
+    allowed_ranges = build_allowed_ranges(allowed_langs)
+    from ..utils.content_language import LangScanError
+    try:
+        violations = scan_paths(roots, allowed_ranges, ignore_globs=ignore_globs or None)
+    except LangScanError as exc:
+        ui.result({
+            "status": "ERROR",
+            "message": str(exc),
+        })
+        return 1
+
+    files_scanned = _count_md_files(roots)
+
+    if not violations:
+        result = {
+            "status": "PASS",
+            "allowed_languages": allowed_langs,
+            "files_scanned": files_scanned,
+            "violation_count": 0,
+        }
+        if ignore_globs:
+            result["ignore_globs"] = ignore_globs
+        ui.result(result, human_fn=lambda d: _human_result(d, quiet=args.quiet))
+        return 0
+
+    # Group violations by file for reporting
+    by_file: dict = {}
+    for v in violations:
+        by_file.setdefault(str(v.path), []).append(v)
+
+    violation_items = []
+    for file_path, file_violations in by_file.items():
+        for v in file_violations:
+            violation_items.append({
+                "path": file_path,
+                "line": v.lineno,
+                "chars": v.bad_chars_preview(),
+                "preview": v.line_preview(),
+                "code": EC.CONTENT_LANGUAGE_VIOLATION,
+            })
+
+    result = {
+        "status": "FAIL",
+        "allowed_languages": allowed_langs,
+        "files_scanned": files_scanned,
+        "violation_count": len(violations),
+        "file_count": len(by_file),
+        "violations": violation_items,
+    }
+    if ignore_globs:
+        result["ignore_globs"] = ignore_globs
+    ui.result(result, human_fn=lambda d: _human_result(d, quiet=args.quiet))
+    return 2
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _read_config_languages() -> List[str]:
+    """Read allowed_content_languages from workspace config; fall back to ['en']."""
+    try:
+        from ..utils.context import get_context
+        from ..utils.workspace import find_workspace_config
+
+        ctx = get_context()
+        if ctx is None:
+            return ["en"]
+        _ws_cfg, _ = find_workspace_config(ctx.project_root)
+        if _ws_cfg is not None and _ws_cfg.validation is not None:  # type: ignore[union-attr]
+            langs = _ws_cfg.validation.allowed_content_languages  # type: ignore[union-attr]
+            if langs:
+                return langs
+    except Exception:
+        pass
+    return ["en"]
+
+
+def _read_config_ignore_paths() -> List[str]:
+    """Read check_language_ignore_paths from workspace config; fall back to []."""
+    try:
+        from ..utils.context import get_context
+        from ..utils.workspace import find_workspace_config
+
+        ctx = get_context()
+        if ctx is None:
+            return []
+        _ws_cfg, _ = find_workspace_config(ctx.project_root)
+        if _ws_cfg is not None and _ws_cfg.validation is not None:  # type: ignore[union-attr]
+            paths = _ws_cfg.validation.check_language_ignore_paths  # type: ignore[union-attr]
+            if paths:
+                return list(paths)
+    except Exception:
+        pass
+    return []
+
+
+def _default_roots() -> List[Path]:
+    """Return the default scan root (architecture/ under project root)."""
+    try:
+        from ..utils.context import get_context
+
+        ctx = get_context()
+        if ctx is not None:
+            return [ctx.project_root / "architecture"]
+    except (ImportError, AttributeError):
+        pass
+    return [Path.cwd() / "architecture"]
+
+
+def _count_md_files(roots: List[Path]) -> int:
+    count = 0
+    for root in roots:
+        if root.is_file():
+            if root.suffix.lower() == ".md":
+                count += 1
+        elif root.is_dir():
+            count += sum(1 for _ in root.rglob("*.md"))
+    return count
+
+
+# ---------------------------------------------------------------------------
+# Human formatter
+# ---------------------------------------------------------------------------
+
+def _human_result(data: dict, quiet: bool = False) -> None:
+    status = data.get("status", "")
+    allowed = data.get("allowed_languages", [])
+
+    if not quiet:
+        ui.header("check-language")
+        ui.detail("Allowed languages", ", ".join(allowed))
+        n_files = data.get("files_scanned", 0)
+        ui.detail("Files scanned", str(n_files))
+        ui.blank()
+
+    if status == "PASS":
+        ui.success("No language violations found.")
+        ui.blank()
+        return
+
+    if status == "ERROR":
+        ui.error(str(data.get("message", "Unknown error")))
+        ui.blank()
+        return
+
+    n_viol = data.get("violation_count", 0)
+    n_file_count = data.get("file_count", 0)
+    ui.warn(f"FAIL  {n_viol} violation(s) in {n_file_count} file(s)")
+    ui.blank()
+
+    violations = data.get("violations", [])
+    by_file: dict = {}
+    for v in violations:
+        by_file.setdefault(v["path"], []).append(v)
+
+    for file_path, file_violations in by_file.items():
+        ui.substep(f"  {ui.relpath(file_path)}  ({len(file_violations)} line(s))")
+        for v in file_violations:
+            ui.substep(f"    line {v['line']:>4}  [{v['chars']}]  {v['preview']}")
+        ui.blank()
+
+    ui.hint("Fix: rewrite flagged content in the allowed language(s).")
+    ui.hint(
+        "To allow additional scripts, add to .cypilot-workspace.toml:\n"
+        "  [validation]\n"
+        "  allowed_content_languages = [\"en\", \"ru\"]"
+    )
+    ui.hint(
+        "To ignore specific paths (e.g. translation specs), use --exclude or add to config:\n"
+        "  [validation]\n"
+        "  check_language_ignore_paths = [\"translations/**\", \"specs/i18n/*.md\"]\n"
+        "To ignore a single file, add  <!-- cpt-lang: ignore -->  anywhere in the file."
+    )
+    if data.get("ignore_globs"):
+        ui.detail("Active ignore globs", ", ".join(data["ignore_globs"]))
+    ui.blank()
@@ -390,6 +390,15 @@ def _attach_issue_to_artifact_report(issue: Dict[str, object], *, is_error: bool
                 rep["warnings"].append(issue)
     # @cpt-end:cpt-cypilot-flow-traceability-validation-validate:p1:inst-validate-helpers
 
+    # Content language check — runs after per-artifact structure validation.
+    # Skipped if structure has already failed (all_errors non-empty) so language
+    # issues never obscure structural errors.
+    if not all_errors:
+        _lang_errs = _run_content_language_check(artifacts_to_validate, ws_ctx, project_root)
+        for _le in _lang_errs:
+            all_errors.append(_le)
+            _attach_issue_to_artifact_report(_le, is_error=True)
+
     # @cpt-begin:cpt-cypilot-flow-traceability-validation-validate:p1:inst-if-structure-fail
     # Stop early: cross-artifact reference checks and code traceability checks are run only
     # after per-artifact structure/content checks pass.
@@ -897,6 +906,74 @@ def _suggest_path_from_autodetect(node: object, target_kind: str) -> Optional[st
     return None
 # @cpt-end:cpt-cypilot-flow-traceability-validation-validate:p1:inst-validate-helpers
 
+# ---------------------------------------------------------------------------
+# Content language check helper
+# ---------------------------------------------------------------------------
+
+def _run_content_language_check(
+    artifacts_to_validate: list,
+    ws_ctx: object,
+    project_root: "Path",
+) -> list:
+    """Return language-violation error dicts for all validated .md artifacts.
+
+    Uses ``ws_ctx.project_root`` when available, otherwise falls back to
+    ``project_root``, so the check works in both workspace mode and
+    single-repo mode.  Returns an empty list when
+    ``allowed_content_languages`` is not configured.  Returns a validation
+    error entry when the workspace config file exists but fails to load.
+    """
+    from ..utils.workspace import find_workspace_config as _find_ws
+    from ..utils.content_language import (
+        LangScanError as _LangScanError,
+        build_allowed_ranges,
+        scan_file as _scan_file,
+    )
+    from ..utils.constraints import error as _error
+    from ..utils import error_codes as _EC
+
+    root = getattr(ws_ctx, "project_root", None) or project_root
+    _ws_cfg, _ws_err = _find_ws(root)
+    if _ws_err is not None:
+        return [_error(
+            "workspace",
+            f"Failed to load workspace config for language check: {_ws_err}",
+            path=root,
+            code=_EC.FILE_LOAD_ERROR,
+        )]
+    if _ws_cfg is None or _ws_cfg.validation is None:
+        return []
+    allowed_langs = _ws_cfg.validation.allowed_content_languages
+    if not allowed_langs:
+        return []
+
+    allowed_ranges = build_allowed_ranges(allowed_langs)
+    results = []
+    for artifact_path, _template_path, _artifact_type, _traceability, _kit_id in artifacts_to_validate:
+        if artifact_path.suffix.lower() != ".md":
+            continue
+        try:
+            violations = _scan_file(artifact_path, allowed_ranges)
+        except _LangScanError as exc:
+            results.append(_error(
+                "language",
+                f"Cannot read file for language scan: {exc}",
+                path=artifact_path,
+                code=_EC.FILE_READ_ERROR,
+            ))
+            continue
+        for v in violations:
+            results.append(_error(
+                "language",
+                f"Non-allowed characters [{v.bad_chars_preview()}] — {v.line_preview()}",
+                path=artifact_path,
+                line=v.lineno,
+                code=_EC.CONTENT_LANGUAGE_VIOLATION,
+                allowed_languages=allowed_langs,
+            ))
+    return results
+
+
 # ---------------------------------------------------------------------------
 # Human-friendly formatter
 # ---------------------------------------------------------------------------