Skip to content

Commit 9dc5570

Browse files
committed
fix(cloudsave): degrade when local state is unavailable
本次修复在启动前预检本机 state 目录;当 anchor/state/root_state 等本机状态路径不可用时,仅禁用本次会话云存档,不再阻断整个应用启动。 云存档 provider、bootstrap/import/export/upload/download、角色 tombstone 与 Workshop tombstone 链路都会跳过坏 state;存储位置写入/迁移动作在 local_state_unavailable 时返回 409,避免污染 root_state 或 migration checkpoint。 同时补充错误诊断、入口禁用、i18n、设计文档和单元回归覆盖,保持 cloudsave/state 不参与存储迁移。
1 parent 406a9ca commit 9dc5570

31 files changed

Lines changed: 1072 additions & 66 deletions

app/main_server.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def _get_app_root():
7575
MaintenanceModeError,
7676
ROOT_MODE_NORMAL,
7777
bootstrap_local_cloudsave_environment,
78+
is_cloudsave_disabled,
7879
is_write_fence_active,
7980
maintenance_error_payload,
8081
set_root_mode,
@@ -2031,21 +2032,25 @@ async def _ensure_main_server_runtime_initialized(*, reason: str) -> bool:
20312032
return False
20322033

20332034
try:
2034-
bootstrap_local_cloudsave_environment(_config_manager)
2035-
import_result = None
2036-
try:
2037-
import_result = await _run_cloudsave_manager_action(
2038-
"import_if_needed",
2039-
reason="main_server_startup",
2040-
budget_seconds=10.0,
2041-
)
2042-
logger.info("Steam Auto-Cloud startup import: %s", import_result)
2043-
except CloudsaveDeadlineExceeded:
2044-
logger.warning(
2045-
"Steam Auto-Cloud startup import exceeded 10.0s budget before applying runtime changes; continuing with local runtime state"
2046-
)
2047-
except Exception as e:
2048-
logger.warning(f"Steam Auto-Cloud startup import failed: {e}")
2035+
if is_cloudsave_disabled():
2036+
logger.warning("Steam Auto-Cloud startup skipped because cloudsave is disabled for this session")
2037+
import_result = None
2038+
else:
2039+
bootstrap_local_cloudsave_environment(_config_manager)
2040+
import_result = None
2041+
try:
2042+
import_result = await _run_cloudsave_manager_action(
2043+
"import_if_needed",
2044+
reason="main_server_startup",
2045+
budget_seconds=10.0,
2046+
)
2047+
logger.info("Steam Auto-Cloud startup import: %s", import_result)
2048+
except CloudsaveDeadlineExceeded:
2049+
logger.warning(
2050+
"Steam Auto-Cloud startup import exceeded 10.0s budget before applying runtime changes; continuing with local runtime state"
2051+
)
2052+
except Exception as e:
2053+
logger.warning(f"Steam Auto-Cloud startup import failed: {e}")
20492054

20502055
await initialize_character_data()
20512056
await _sync_memory_server_after_startup_import(import_result)
@@ -2097,8 +2102,10 @@ async def _ensure_main_server_runtime_initialized(*, reason: str) -> bool:
20972102
except Exception as e:
20982103
logger.warning(f"全局语言初始化失败(不影响启动): {e}")
20992104

2100-
current_root_state = _config_manager.load_root_state()
2101-
if should_write_root_mode_normal_after_startup(current_root_state):
2105+
current_root_state = None if is_cloudsave_disabled() else _config_manager.load_root_state()
2106+
if current_root_state is None:
2107+
logger.warning("跳过 ROOT_MODE_NORMAL 写入:cloudsave 已为本次会话禁用")
2108+
elif should_write_root_mode_normal_after_startup(current_root_state):
21022109
try:
21032110
set_root_mode(
21042111
_config_manager,

app/memory_server.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
MaintenanceModeError,
8181
ROOT_MODE_NORMAL,
8282
bootstrap_local_cloudsave_environment,
83+
is_cloudsave_disabled,
8384
maintenance_error_payload,
8485
set_root_mode,
8586
should_write_root_mode_normal_after_startup,
@@ -2940,11 +2941,14 @@ async def ensure_memory_server_runtime_initialized(*, reason: str = "") -> bool:
29402941
return False
29412942

29422943
bootstrap_ok = False
2943-
try:
2944-
bootstrap_local_cloudsave_environment(_config_manager)
2945-
bootstrap_ok = True
2946-
except Exception as e:
2947-
logger.warning(f"[Memory] cloudsave 环境 bootstrap 失败,后续 cloudsave 相关操作可能降级: {e}")
2944+
if is_cloudsave_disabled():
2945+
logger.warning("[Memory] 跳过 cloudsave 环境 bootstrap:cloudsave 已为本次会话禁用")
2946+
else:
2947+
try:
2948+
bootstrap_local_cloudsave_environment(_config_manager)
2949+
bootstrap_ok = True
2950+
except Exception as e:
2951+
logger.warning(f"[Memory] cloudsave 环境 bootstrap 失败,后续 cloudsave 相关操作可能降级: {e}")
29482952

29492953
try:
29502954
from memory import migrate_to_character_dirs

docs/design/cloud-save-sync-optimization-plan.md

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,17 +85,31 @@
8585

8686
执行顺序:
8787

88-
1. `bootstrap_local_cloudsave_environment()`
89-
2. `CloudSaveManager.import_if_needed(reason="launcher_phase0_prelaunch_import")`
90-
3. root mode 切回 `normal`
91-
4. 发送 `cloudsave_bootstrap_ready` 事件。
88+
1. launcher 先在 fence 外校验本机 `state/` 目录可创建/可写。
89+
2. 进入 `cloud_apply_fence(mode="bootstrap_importing")`
90+
3. `bootstrap_local_cloudsave_environment()` 创建/校验 `state/``cloudsave/` 基础骨架,并执行 legacy/recovery 启动逻辑。
91+
4. `CloudSaveManager.import_if_needed(reason="launcher_phase0_prelaunch_import")`,真正应用快照仍在 fence 内执行。
92+
5. root mode 切回 `normal`
93+
6. 发送 `cloudsave_bootstrap_ready` 事件。
9294

9395
事件脱敏契约:`import_result` 只允许:
9496

9597
- `success`
9698
- `action`
9799
- `requested_reason`
98100

101+
若本机 `state/` 初始化失败(例如 `%LOCALAPPDATA%/N.E.K.O`、其 `state/` 或 state JSON 被同名文件/目录占用、不可写或被安全软件拦截),launcher 不得再尝试写入 `maintenance_readonly`,因为这会再次依赖同一个不可用的 `state/`
102+
103+
当前降级口径:
104+
105+
- 设置 `NEKO_CLOUDSAVE_DISABLED=local_state_unavailable`,本次会话禁用 cloudsave bootstrap/import/export 与 write fence。
106+
- main_server / memory_server 继续按本地运行时真源启动,不自动应用 Steam `cloudsave/` 快照。
107+
- 云存档接口与页面应显示 provider unavailable / disabled,不再读取坏的本机 state。
108+
- 普通角色、配置、记忆等本地运行时写入不应被 cloudsave state 问题拖垮。
109+
- 存储位置只读状态接口可继续返回 disabled 诊断;需要写 `root_state` / migration checkpoint 的存储迁移动作必须拒绝执行,避免坏 `state/` 再污染迁移控制面。
110+
111+
关闭 Steam Cloud 不会影响该本机状态目录初始化;降级只是保证用户能先启动应用,仍应提示用户修复 `anchor_root` / `local_state_dir` / `failed_path` 指向的本机路径。
112+
99113
### 4.2 main_server 直启兜底
100114

101115
- startup 会再做一次 `bootstrap + import_if_needed(reason="main_server_startup")`

launcher.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ def _maybe_reexec_into_project_venv(project_dir: str) -> None:
266266
set_port_probe_reuse,
267267
)
268268
from utils.cloudsave_runtime import (
269+
CLOUDSAVE_DISABLED_ENV,
270+
CLOUDSAVE_DISABLED_LOCAL_STATE_UNAVAILABLE,
269271
ROOT_MODE_BOOTSTRAP_IMPORTING,
270272
ROOT_MODE_MAINTENANCE_READONLY,
271273
ROOT_MODE_NORMAL,
@@ -1903,6 +1905,12 @@ def _prepare_cloudsave_runtime_for_launch() -> dict:
19031905
reset_config_manager_cache()
19041906
config_manager = get_config_manager(APP_NAME, migrate=False)
19051907

1908+
if not config_manager.ensure_local_state_directory():
1909+
diagnostic = getattr(config_manager, "_last_local_state_directory_error", None)
1910+
if diagnostic is not None:
1911+
raise diagnostic
1912+
raise OSError("failed to ensure local state directory")
1913+
19061914
with cloud_apply_fence(
19071915
config_manager,
19081916
mode=ROOT_MODE_BOOTSTRAP_IMPORTING,
@@ -1952,6 +1960,18 @@ def _prepare_cloudsave_runtime_for_launch() -> dict:
19521960
}
19531961

19541962

1963+
def _is_local_state_directory_error(exc) -> bool:
1964+
if bool(getattr(exc, "local_state_directory_error", False)):
1965+
return True
1966+
cause = getattr(exc, "__cause__", None)
1967+
if cause is not None and cause is not exc:
1968+
return _is_local_state_directory_error(cause)
1969+
context = getattr(exc, "__context__", None)
1970+
if context is not None and context is not exc:
1971+
return _is_local_state_directory_error(context)
1972+
return False
1973+
1974+
19551975
def main():
19561976
"""主函数"""
19571977
# 支持 multiprocessing 在 Windows 上的打包
@@ -1990,17 +2010,24 @@ def main():
19902010
try:
19912011
_prepare_cloudsave_runtime_for_launch()
19922012
except Exception as e:
1993-
try:
1994-
_config_manager = get_config_manager(APP_NAME)
1995-
set_root_mode(
1996-
_config_manager,
1997-
ROOT_MODE_MAINTENANCE_READONLY,
1998-
last_migration_result=f"launcher_phase0_bootstrap_failed:{e}",
1999-
)
2000-
except Exception:
2001-
pass
2002-
report_startup_failure(f"Startup failed: cloudsave bootstrap error: {e}")
2003-
return 1
2013+
if not _is_local_state_directory_error(e):
2014+
try:
2015+
_config_manager = get_config_manager(APP_NAME)
2016+
set_root_mode(
2017+
_config_manager,
2018+
ROOT_MODE_MAINTENANCE_READONLY,
2019+
last_migration_result=f"launcher_phase0_bootstrap_failed:{e}",
2020+
)
2021+
except Exception:
2022+
pass
2023+
report_startup_failure(f"Startup failed: cloudsave bootstrap error: {e}")
2024+
return 1
2025+
os.environ[CLOUDSAVE_DISABLED_ENV] = CLOUDSAVE_DISABLED_LOCAL_STATE_UNAVAILABLE
2026+
print(
2027+
"[Launcher] Cloudsave disabled for this session because local state is unavailable: "
2028+
f"{e}",
2029+
flush=True,
2030+
)
20042031

20052032
# 自动安装 Playwright Chromium(browser-use 依赖)
20062033
_ensure_playwright_browsers()

main_routers/characters_router.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@
121121
list_persona_presets,
122122
)
123123
from utils.url_utils import encode_url_path
124-
from utils.cloudsave_runtime import MaintenanceModeError, assert_cloudsave_writable
124+
from utils.cloudsave_runtime import MaintenanceModeError, assert_cloudsave_writable, is_cloudsave_disabled
125125
from config import (
126126
MEMORY_SERVER_PORT,
127127
TFLINK_UPLOAD_URL,
@@ -1494,6 +1494,9 @@ def _restore_snapshot_paths(records) -> None:
14941494

14951495

14961496
def _build_character_tombstones_state(config_manager, character_name: str) -> dict:
1497+
if is_cloudsave_disabled():
1498+
return config_manager.build_default_character_tombstones_state()
1499+
14971500
cloud_state = config_manager.load_cloudsave_local_state()
14981501
sequence_number = max(1, int(cloud_state.get("next_sequence_number") or 1))
14991502
tombstone_state = config_manager.load_character_tombstones_state()
@@ -3591,7 +3594,8 @@ async def delete_catgirl(name: str):
35913594
tombstone_snapshot = None
35923595
memory_server_reloaded = False
35933596
try:
3594-
tombstone_snapshot = copy.deepcopy(_config_manager.load_character_tombstones_state())
3597+
if not is_cloudsave_disabled():
3598+
tombstone_snapshot = copy.deepcopy(_config_manager.load_character_tombstones_state())
35953599

35963600
removed_memory_paths = await asyncio.to_thread(
35973601
delete_character_memory_storage, _config_manager, name
@@ -3605,10 +3609,11 @@ async def delete_catgirl(name: str):
36053609
if meta_path.exists():
36063610
await asyncio.to_thread(meta_path.unlink)
36073611

3608-
await asyncio.to_thread(
3609-
_config_manager.save_character_tombstones_state,
3610-
_build_character_tombstones_state(_config_manager, name),
3611-
)
3612+
if not is_cloudsave_disabled():
3613+
await asyncio.to_thread(
3614+
_config_manager.save_character_tombstones_state,
3615+
_build_character_tombstones_state(_config_manager, name),
3616+
)
36123617

36133618
# 删除角色配置
36143619
del characters['猫娘'][name]

main_routers/storage_location_router.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,13 @@
3333
get_request_app_shutdown,
3434
get_release_storage_startup_barrier,
3535
)
36-
from utils.cloudsave_runtime import ROOT_MODE_MAINTENANCE_READONLY, ROOT_MODE_NORMAL, set_root_mode
36+
from utils.cloudsave_runtime import (
37+
ROOT_MODE_MAINTENANCE_READONLY,
38+
ROOT_MODE_NORMAL,
39+
cloudsave_disabled_reason,
40+
is_cloudsave_disabled_due_to_local_state_unavailable,
41+
set_root_mode,
42+
)
3743
from utils.storage_location_bootstrap import (
3844
STORAGE_STARTUP_BLOCKING_REASONS,
3945
STORAGE_STATUS_POLL_INTERVAL_MS,
@@ -113,6 +119,19 @@ def _set_no_cache_headers(response: Response) -> None:
113119
response.headers["Expires"] = "0"
114120

115121

122+
def _reject_storage_mutation_when_cloudsave_disabled(response: Response) -> dict[str, Any] | None:
123+
if not is_cloudsave_disabled_due_to_local_state_unavailable():
124+
return None
125+
response.status_code = 409
126+
return {
127+
"ok": False,
128+
"error_code": "cloudsave_local_state_unavailable",
129+
"error": "本机状态目录不可用,当前会话已禁用云存档。请先修复本机 state 路径后重启应用,再进行存储位置变更。",
130+
"cloudsave_disabled": True,
131+
"cloudsave_disabled_reason": cloudsave_disabled_reason(),
132+
}
133+
134+
116135
def _normalize_optional_path(value: Any) -> str:
117136
raw_value = str(value or "").strip()
118137
if not raw_value:
@@ -1081,6 +1100,10 @@ async def post_storage_location_exit(request: Request, response: Response):
10811100
"error": "缺少存储退出确认标记。",
10821101
}
10831102

1103+
disabled_response = _reject_storage_mutation_when_cloudsave_disabled(response)
1104+
if disabled_response is not None:
1105+
return disabled_response
1106+
10841107
config_manager = _get_storage_config_manager()
10851108
bootstrap_payload = build_storage_location_bootstrap_payload(config_manager)
10861109
blocking_reason = str(bootstrap_payload.get("blocking_reason") or "").strip()
@@ -1216,6 +1239,10 @@ async def _post_storage_location_retained_source_cleanup_locked(
12161239
):
12171240
_set_no_cache_headers(response)
12181241

1242+
disabled_response = _reject_storage_mutation_when_cloudsave_disabled(response)
1243+
if disabled_response is not None:
1244+
return disabled_response
1245+
12191246
config_manager = _get_storage_config_manager()
12201247
notice = _build_completed_migration_notice(
12211248
config_manager,
@@ -1300,6 +1327,10 @@ async def _post_storage_location_select_locked(
13001327
):
13011328
_set_no_cache_headers(response)
13021329

1330+
disabled_response = _reject_storage_mutation_when_cloudsave_disabled(response)
1331+
if disabled_response is not None:
1332+
return disabled_response
1333+
13031334
config_manager = _get_storage_config_manager()
13041335
current_root = normalize_runtime_root(config_manager.app_docs_dir)
13051336
anchor_root = compute_anchor_root(config_manager, current_root=current_root)
@@ -1508,6 +1539,10 @@ async def post_storage_location_preflight(
15081539
):
15091540
_set_no_cache_headers(response)
15101541

1542+
disabled_response = _reject_storage_mutation_when_cloudsave_disabled(response)
1543+
if disabled_response is not None:
1544+
return disabled_response
1545+
15111546
config_manager = _get_storage_config_manager()
15121547
current_root = normalize_runtime_root(config_manager.app_docs_dir)
15131548
anchor_root = compute_anchor_root(config_manager, current_root=current_root)
@@ -1587,6 +1622,10 @@ async def _post_storage_location_restart_locked(
15871622
):
15881623
_set_no_cache_headers(response)
15891624

1625+
disabled_response = _reject_storage_mutation_when_cloudsave_disabled(response)
1626+
if disabled_response is not None:
1627+
return disabled_response
1628+
15901629
config_manager = _get_storage_config_manager()
15911630
current_root = normalize_runtime_root(config_manager.app_docs_dir)
15921631
anchor_root = compute_anchor_root(config_manager, current_root=current_root)

main_routers/workshop_router.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from fastapi.responses import FileResponse, JSONResponse
3232

3333
from .shared_state import ensure_steamworks as get_steamworks, get_config_manager, get_initialize_character_data
34-
from utils.cloudsave_runtime import MaintenanceModeError, is_write_fence_active
34+
from utils.cloudsave_runtime import MaintenanceModeError, is_cloudsave_disabled, is_write_fence_active
3535
from utils.file_utils import atomic_write_json, atomic_write_json_async, read_json_async
3636
from utils.workshop_utils import (
3737
ensure_workshop_folder_exists,
@@ -236,6 +236,9 @@ def _read_first_line(path: str, encoding: str = 'utf-8') -> str:
236236

237237
def _load_deleted_character_names(config_mgr) -> set[str]:
238238
deleted_names: set[str] = set()
239+
if is_cloudsave_disabled():
240+
return deleted_names
241+
239242
try:
240243
tombstone_state = config_mgr.load_character_tombstones_state()
241244
except Exception as exc:
@@ -253,6 +256,9 @@ def _load_deleted_character_names(config_mgr) -> set[str]:
253256

254257
def _remove_deleted_character_tombstones(config_mgr, character_names: list[str]) -> list[str]:
255258
"""移除手动恢复角色对应的 tombstone,避免后续同步继续把它当作已删除。"""
259+
if is_cloudsave_disabled():
260+
return []
261+
256262
target_names = {str(name or "").strip() for name in character_names}
257263
target_names.discard("")
258264
if not target_names:
@@ -283,6 +289,15 @@ def _remove_deleted_character_tombstones(config_mgr, character_names: list[str])
283289
return removed_names
284290

285291

292+
def _write_deleted_character_tombstone(config_mgr, character_name: str, build_tombstone_state) -> bool:
293+
if is_cloudsave_disabled():
294+
return False
295+
296+
tombstone_state = build_tombstone_state(config_mgr, character_name)
297+
config_mgr.save_character_tombstones_state(tombstone_state)
298+
return True
299+
300+
286301
def _derive_workshop_origin_display_name(raw_model_name: str, fallback_name: str) -> str:
287302
normalized_name = str(raw_model_name or "").strip().replace("\\", "/")
288303
if not normalized_name:
@@ -3382,9 +3397,11 @@ async def _delete_memory_with_retry(name: str) -> list:
33823397
)
33833398

33843399
async def _write_tombstone(name: str) -> None:
3385-
tombstone_state = _build_character_tombstones_state(config_mgr, name)
33863400
await asyncio.to_thread(
3387-
config_mgr.save_character_tombstones_state, tombstone_state
3401+
_write_deleted_character_tombstone,
3402+
config_mgr,
3403+
name,
3404+
_build_character_tombstones_state,
33883405
)
33893406

33903407
async def _remove_one(name: str) -> None:

0 commit comments

Comments
 (0)