Skip to content

Commit b1def96

Browse files
committed
follow up main code
1 parent 8a163a7 commit b1def96

2 files changed

Lines changed: 28 additions & 8 deletions

File tree

src/ai/backend/agent/kernel_registry/loader/pickle.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,29 +15,44 @@
1515

1616

1717
class PickleBasedKernelRegistryLoader(AbstractKernelRegistryLoader[KernelRegistry]):
18-
def __init__(self, last_registry_file_path: Path, legacy_registry_file_path: Path) -> None:
18+
def __init__(
19+
self,
20+
last_registry_file_path: Path,
21+
fallback_registry_file_path: Path,
22+
legacy_registry_file_path: Path,
23+
) -> None:
1924
self._last_registry_file_path = last_registry_file_path
25+
self._fallback_registry_file_path = fallback_registry_file_path
2026
self._legacy_registry_file_path = legacy_registry_file_path
2127

2228
@override
2329
async def load_kernel_registry(self) -> KernelRegistry:
2430
legacy_registry_file = self._legacy_registry_file_path
25-
last_registry_file = self._last_registry_file_path
31+
fallback_registry_file = self._fallback_registry_file_path
32+
final_file_path = self._last_registry_file_path
33+
if not final_file_path.is_file():
34+
log.warning(
35+
"Registry file with name {} not found. "
36+
"Falling back to path with local instance id: {}",
37+
final_file_path,
38+
fallback_registry_file,
39+
)
40+
final_file_path = fallback_registry_file
2641
try:
2742
if os.path.isfile(legacy_registry_file):
28-
shutil.move(legacy_registry_file, last_registry_file)
43+
shutil.move(legacy_registry_file, final_file_path)
2944
except Exception as e:
3045
log.warning(
3146
"Failed to move legacy kernel registry file {} to {} (err: {})",
3247
str(legacy_registry_file),
33-
str(last_registry_file),
48+
str(final_file_path),
3449
str(e),
3550
)
3651
try:
37-
with open(last_registry_file, "rb") as f:
52+
with open(final_file_path, "rb") as f:
3853
return pickle.load(f)
3954
except EOFError as e:
40-
log.warning("Failed to load the last kernel registry: {}", str(last_registry_file))
55+
log.warning("Failed to load the last kernel registry: {}", str(final_file_path))
4156
raise KernelRegistryLoadError from e
4257
except FileNotFoundError as e:
4358
raise KernelRegistryNotFound from e

src/ai/backend/agent/kernel_registry/recovery/pickle.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from dataclasses import dataclass
22
from pathlib import Path
33

4+
from ai.backend.common.types import AgentId
5+
46
from ...kernel import KernelRegistry
57
from ..loader.pickle import PickleBasedKernelRegistryLoader
68
from ..writer.pickle import PickleBasedKernelRegistryWriter
@@ -11,17 +13,20 @@
1113
class PickleBasedKernelRegistryRecoveryArgs:
1214
ipc_base_path: Path
1315
var_base_path: Path
16+
agent_id: AgentId
1417
local_instance_id: str
1518

1619

1720
class PickleBasedKernelRegistryRecovery:
1821
def __init__(self, args: PickleBasedKernelRegistryRecoveryArgs) -> None:
19-
registry_file_name = f"kernel_registry.{args.local_instance_id}.dat"
22+
registry_file_name = f"kernel_registry.{args.agent_id}.dat"
23+
fallback_registry_file_name = f"kernel_registry.{args.local_instance_id}.dat"
2024
legacy_registry_file_path = args.ipc_base_path / registry_file_name
25+
fallback_registry_file_path = args.var_base_path / fallback_registry_file_name
2126
last_registry_file_path = args.var_base_path / registry_file_name
2227

2328
self._loader = PickleBasedKernelRegistryLoader(
24-
last_registry_file_path, legacy_registry_file_path
29+
last_registry_file_path, fallback_registry_file_path, legacy_registry_file_path
2530
)
2631
self._writer = PickleBasedKernelRegistryWriter(last_registry_file_path)
2732

0 commit comments

Comments
 (0)