Skip to content
Merged
1 change: 1 addition & 0 deletions plugin/plugins/_shared/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Shared plugin helpers that are not owned by a single user plugin."""
25 changes: 25 additions & 0 deletions plugin/plugins/_shared/rapidocr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Shared RapidOCR runtime, model download, and OCR backend helpers."""

from .ocr_backends import RapidOcrBackend
from .rapidocr_support import (
DEFAULT_RAPIDOCR_ENGINE_TYPE,
DEFAULT_RAPIDOCR_LANG_TYPE,
DEFAULT_RAPIDOCR_MODEL_TYPE,
DEFAULT_RAPIDOCR_OCR_VERSION,
RAPIDOCR_PACKAGE_NAME,
download_rapidocr_models,
inspect_rapidocr_installation,
load_rapidocr_runtime,
)

__all__ = [
"DEFAULT_RAPIDOCR_ENGINE_TYPE",
"DEFAULT_RAPIDOCR_LANG_TYPE",
"DEFAULT_RAPIDOCR_MODEL_TYPE",
"DEFAULT_RAPIDOCR_OCR_VERSION",
"RAPIDOCR_PACKAGE_NAME",
"RapidOcrBackend",
"download_rapidocr_models",
"inspect_rapidocr_installation",
"load_rapidocr_runtime",
]
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import httpx

from .memory_reader import is_windows_platform

from ._model_registry import (
DEFAULT_RAPIDOCR_ENGINE_TYPE,
DEFAULT_RAPIDOCR_LANG_TYPE,
Expand All @@ -25,6 +23,7 @@
)
from ._paths import (
_rapidocr_install_state_path,
is_windows_platform,
resolve_rapidocr_install_target,
resolve_rapidocr_model_cache_dir,
resolve_rapidocr_runtime_dir,
Expand All @@ -40,16 +39,35 @@ def inspect_rapidocr_installation(
lang_type: str = DEFAULT_RAPIDOCR_LANG_TYPE,
model_type: str = DEFAULT_RAPIDOCR_MODEL_TYPE,
ocr_version: str = DEFAULT_RAPIDOCR_OCR_VERSION,
plugin_id: str,
platform_fn: Callable[[], bool] | None = None,
) -> dict[str, Any]:
checker = platform_fn or is_windows_platform
supported = bool(checker())
target_dir = resolve_rapidocr_install_target(install_target_dir_raw)
runtime_dir = resolve_rapidocr_runtime_dir(install_target_dir_raw)
site_packages_dir = resolve_rapidocr_site_packages_dir(install_target_dir_raw)
model_cache_dir = resolve_rapidocr_model_cache_dir(install_target_dir_raw)
package_dir = _runtime._rapidocr_package_dir(install_target_dir_raw)
install_state_path = _rapidocr_install_state_path(install_target_dir_raw)
target_dir = resolve_rapidocr_install_target(
install_target_dir_raw,
plugin_id=plugin_id,
)
runtime_dir = resolve_rapidocr_runtime_dir(
install_target_dir_raw,
plugin_id=plugin_id,
)
site_packages_dir = resolve_rapidocr_site_packages_dir(
install_target_dir_raw,
plugin_id=plugin_id,
)
model_cache_dir = resolve_rapidocr_model_cache_dir(
install_target_dir_raw,
plugin_id=plugin_id,
)
package_dir = _runtime._rapidocr_package_dir(
install_target_dir_raw,
plugin_id=plugin_id,
)
install_state_path = _rapidocr_install_state_path(
install_target_dir_raw,
plugin_id=plugin_id,
)
selected_model = rapidocr_selected_model_name(
ocr_version=ocr_version,
lang_type=lang_type,
Expand All @@ -71,8 +89,7 @@ def inspect_rapidocr_installation(
except (OSError, ValueError, TypeError):
install_state = {}

# rapidocr-onnxruntime is now bundled into the main program (see
# pyproject.toml [dependency-groups] galgame). Treat either source as
# rapidocr-onnxruntime is now bundled into the main program. Treat either source as
# "package present": main interpreter import OR legacy plugin-isolated dir.
bundled_spec = None
try:
Expand Down Expand Up @@ -102,6 +119,7 @@ def inspect_rapidocr_installation(
ocr_version=ocr_version,
lang_type=lang_type,
model_type=model_type,
plugin_id=plugin_id,
)
if missing:
detail = "missing_model_files"
Expand All @@ -119,6 +137,7 @@ def inspect_rapidocr_installation(
lang_type=lang_type,
model_type=model_type,
ocr_version=ocr_version,
plugin_id=plugin_id,
)
detected_path = str(runtime_meta.get("detected_path") or detected_path)
detail = "installed"
Expand All @@ -133,6 +152,7 @@ def inspect_rapidocr_installation(
ocr_version=ocr_version,
lang_type=lang_type,
model_type=model_type,
plugin_id=plugin_id,
)
if legacy_missing:
detail = "missing_model_files"
Expand All @@ -146,14 +166,14 @@ def inspect_rapidocr_installation(
ocr_version=ocr_version,
lang_type=lang_type,
model_type=model_type,
plugin_id=plugin_id,
)
total_size_estimate = sum(int(f.get("size") or 0) for f in missing_files)
return {
"install_supported": supported,
"installed": installed,
# rapidocr-onnxruntime is now bundled into the main program (see
# pyproject.toml [dependency-groups] galgame). When it's not importable
# the user is on a source install without `uv sync --group galgame` —
# rapidocr-onnxruntime is now bundled into the main program. When it's not importable
# the user is on a source install without the optional OCR dependency group —
# no in-app install action exists anymore (HTTP routes removed in this
# refactor), so `can_install` stays False to keep the UI button hidden.
"can_install": False,
Expand All @@ -173,6 +193,7 @@ def inspect_rapidocr_installation(
"ocr_version": ocr_version,
"detail": detail,
"runtime_error": runtime_error,
"install_state": install_state,
"missing_model_files": missing_files,
"missing_model_total_size": total_size_estimate,
"model_download_source": _RAPIDOCR_MODELSCOPE_BASE,
Expand All @@ -182,6 +203,11 @@ def inspect_rapidocr_installation(
# ====== Model download ======

ProgressCallback = Callable[[dict[str, Any]], Awaitable[None] | None]
InstallStateUpdater = Callable[..., dict[str, object]]


def _noop_install_state_updater(*_args: Any, **_kwargs: Any) -> dict[str, object]:
return {}


async def _emit_model_progress(
Expand Down Expand Up @@ -220,9 +246,10 @@ async def download_rapidocr_models(
timeout_seconds: float = 180.0,
force: bool = False,
task_id: str | None = None,
plugin_id: str = "galgame_plugin",
plugin_id: str,
progress_callback: ProgressCallback | None = None,
before_completed_callback: Callable[[], Awaitable[None] | None] | None = None,
install_state_updater: InstallStateUpdater | None = None,
) -> dict[str, Any]:
"""Download all model files required for the (ocr_version, lang_type) selection.

Expand All @@ -233,7 +260,23 @@ async def download_rapidocr_models(
Failures preserve specific error text (HTTP status, timeout, network) so
the UI can show actionable copy.
"""
from .install_tasks import update_install_task_state # local import: avoid cycle
if task_id and install_state_updater is None:
logger.warning(
"rapidocr model download has task_id but no install state updater; progress will not persist"
)
raw_update_install_task_state = install_state_updater or _noop_install_state_updater

def update_install_task_state(*args: Any, **kwargs: Any) -> dict[str, object]:
try:
return raw_update_install_task_state(*args, **kwargs)
except Exception: # noqa: BLE001 - progress persistence must not break downloads.
logger.warning(
"rapidocr model download install state update failed for task_id=%s plugin_id=%s",
args[0] if args else kwargs.get("task_id", ""),
plugin_id,
exc_info=True,
)
return {}

async def _before_completed() -> None:
if before_completed_callback is None:
Expand All @@ -250,7 +293,10 @@ async def _before_completed_safely() -> None:
except Exception: # noqa: BLE001
logger.warning("failed to run rapidocr_models completion callback", exc_info=True)

cache_dir = resolve_rapidocr_model_cache_dir(install_target_dir_raw)
cache_dir = resolve_rapidocr_model_cache_dir(
install_target_dir_raw,
plugin_id=plugin_id,
)
if not cache_dir:
raise RuntimeError("missing RapidOCR model cache directory")
cache_dir.mkdir(parents=True, exist_ok=True)
Expand All @@ -260,6 +306,7 @@ async def _before_completed_safely() -> None:
ocr_version=ocr_version,
lang_type=lang_type,
model_type=model_type,
plugin_id=plugin_id,
)
if not required:
await _before_completed_safely()
Expand Down Expand Up @@ -343,7 +390,7 @@ async def _before_completed_safely() -> None:
url = str(spec["url"])
headers = {
"Accept": "application/octet-stream",
"User-Agent": "N.E.K.O/galgame_plugin",
"User-Agent": f"N.E.K.O/{plugin_id}",
}
source_label = "ModelScope"
running_message = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def _resolve_rapidocr_model_paths(

Both conventions are checked per location to support either source. The
`_infer` form is preferred (matches both bundled wheels and the
test_galgame_rapidocr_support fixtures that came in with PR #1194).
existing RapidOCR support fixtures that came in with PR #1194).
"""
lang = str(lang_type or DEFAULT_RAPIDOCR_LANG_TYPE).strip() or DEFAULT_RAPIDOCR_LANG_TYPE
version = str(ocr_version or DEFAULT_RAPIDOCR_OCR_VERSION).strip() or DEFAULT_RAPIDOCR_OCR_VERSION
Expand Down Expand Up @@ -209,6 +209,16 @@ def _registry_lookup(ocr_version: str, lang_type: str) -> dict[str, dict[str, An
return _RAPIDOCR_MODEL_REGISTRY.get(_normalize_model_key(ocr_version, lang_type))


def rapidocr_selection_requires_downloaded_models(
*,
ocr_version: str,
lang_type: str,
) -> bool:
"""Return True when the local registry expects explicit downloaded model files."""
key = _normalize_model_key(ocr_version, lang_type)
return key != _BUNDLED_KEY and key in _RAPIDOCR_MODEL_REGISTRY


def _registry_spec_for_model_type(
spec: dict[str, Any],
*,
Expand Down Expand Up @@ -238,6 +248,7 @@ def required_rapidocr_model_files(
ocr_version: str,
lang_type: str,
model_type: str = DEFAULT_RAPIDOCR_MODEL_TYPE,
plugin_id: str,
) -> list[dict[str, Any]]:
"""Files that must exist on disk for a given selection. Empty for the bundled combo."""
key = _normalize_model_key(ocr_version, lang_type)
Expand All @@ -246,7 +257,10 @@ def required_rapidocr_model_files(
registry = _RAPIDOCR_MODEL_REGISTRY.get(key)
if not registry:
return []
cache_dir = resolve_rapidocr_model_cache_dir(install_target_dir_raw)
cache_dir = resolve_rapidocr_model_cache_dir(
install_target_dir_raw,
plugin_id=plugin_id,
)
files: list[dict[str, Any]] = []
for kind in ("det", "rec", "cls"):
spec = registry.get(kind)
Expand All @@ -270,6 +284,7 @@ def missing_rapidocr_model_files(
ocr_version: str,
lang_type: str,
model_type: str = DEFAULT_RAPIDOCR_MODEL_TYPE,
plugin_id: str,
) -> list[dict[str, Any]]:
"""Required files that the resolver can't locate on disk.

Expand All @@ -289,11 +304,15 @@ def missing_rapidocr_model_files(
ocr_version=ocr_version,
lang_type=lang_type,
model_type=model_type,
plugin_id=plugin_id,
)
if not required:
return []

cache_dir = resolve_rapidocr_model_cache_dir(install_target_dir_raw)
cache_dir = resolve_rapidocr_model_cache_dir(
install_target_dir_raw,
plugin_id=plugin_id,
)
# Two possible `<package>/models/` dirs to scan:
# 1. The bundled-import path's models dir (find_spec → wheel models).
# 2. The legacy plugin-isolated install's package dir, which sits at
Expand All @@ -311,7 +330,7 @@ def missing_rapidocr_model_files(
except (ImportError, ValueError):
pass
from ._runtime import _rapidocr_package_dir
legacy_pkg = _rapidocr_package_dir(install_target_dir_raw)
legacy_pkg = _rapidocr_package_dir(install_target_dir_raw, plugin_id=plugin_id)
if legacy_pkg and legacy_pkg.exists():
candidate_package_dirs.append(legacy_pkg / "models")
if not candidate_package_dirs:
Expand Down
Loading
Loading