Skip to content

Commit 1777ac7

Browse files
cleanup
1 parent 57a3604 commit 1777ac7

File tree

12 files changed

+29
-288
lines changed

12 files changed

+29
-288
lines changed

AGENTS.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ keras_remote/
2525
```python
2626
@keras_remote.run() called
2727
→ JobContext.from_params() # Resolve config from args/env vars
28-
→ ensure_credentials() # Verify/auto-configure gcloud, ADC, kubeconfig, Docker auth
28+
→ ensure_credentials() # Verify/auto-configure gcloud, ADC, kubeconfig
2929
→ _prepare_artifacts() # Serialize function (cloudpickle), zip working dir
3030
→ _build_container() # Build or retrieve cached Docker image
3131
→ _upload_artifacts() # Upload payload.pkl, context.zip to GCS
@@ -41,7 +41,7 @@ keras_remote/
4141
| ---------------------------- | -------------------------------------------------------------------------------- |
4242
| `core/core.py` | `@run()` decorator, backend routing, env var capture |
4343
| `core/accelerators.py` | Accelerator registry (`GPUS`, `TPUS`), parser (`parse_accelerator`) |
44-
| `credentials.py` | Credential verification & auto-setup (gcloud, ADC, kubeconfig, Docker auth) |
44+
| `credentials.py` | Credential verification & auto-setup (gcloud, ADC, kubeconfig) |
4545
| `backend/execution.py` | `JobContext` dataclass, `BaseK8sBackend` base class, `execute_remote()` pipeline |
4646
| `backend/gke_client.py` | K8s Job creation, status polling, pod log retrieval |
4747
| `backend/pathways_client.py` | LeaderWorkerSet creation for multi-host TPUs |

keras_remote/backend/execution_test.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -193,34 +193,6 @@ def test_cleanup_on_wait_failure(self):
193193
# cleanup_job is called in finally block even when wait fails
194194
backend.cleanup_job.assert_called_once()
195195

196-
def test_ensure_credentials_called_with_correct_args(self):
197-
with (
198-
mock.patch(
199-
"keras_remote.backend.execution.ensure_credentials"
200-
) as mock_creds,
201-
mock.patch("keras_remote.backend.execution._build_container"),
202-
mock.patch("keras_remote.backend.execution._upload_artifacts"),
203-
mock.patch(
204-
"keras_remote.backend.execution._download_result",
205-
return_value={"success": True, "result": 0},
206-
),
207-
mock.patch(
208-
"keras_remote.backend.execution._cleanup_and_return",
209-
return_value=0,
210-
),
211-
):
212-
ctx = self._make_ctx()
213-
backend = MagicMock()
214-
backend.cluster = "test-cluster"
215-
216-
execute_remote(ctx, backend)
217-
218-
mock_creds.assert_called_once_with(
219-
project="proj",
220-
zone="us-central1-a",
221-
cluster="test-cluster",
222-
)
223-
224196

225197
if __name__ == "__main__":
226198
absltest.main()

keras_remote/cli/commands/up.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from keras_remote.cli.config import InfraConfig
99
from keras_remote.cli.constants import DEFAULT_CLUSTER_NAME, DEFAULT_ZONE
1010
from keras_remote.cli.infra.post_deploy import (
11-
configure_docker_auth,
1211
configure_kubectl,
1312
install_gpu_drivers,
1413
install_lws,
@@ -24,7 +23,6 @@
2423
)
2524
from keras_remote.cli.prerequisites_check import check_all
2625
from keras_remote.cli.prompts import prompt_accelerator, resolve_project
27-
from keras_remote.constants import zone_to_ar_location
2826
from keras_remote.core import accelerators
2927
from keras_remote.core.accelerators import GpuConfig
3028

@@ -113,11 +111,9 @@ def up(project, zone, accelerator, cluster_name, yes):
113111
)
114112

115113
# Post-deploy steps
116-
ar_location = zone_to_ar_location(zone)
117114
console.print("\n[bold]Running post-deploy configuration...[/bold]\n")
118115

119116
steps = [
120-
("Docker authentication", lambda: configure_docker_auth(ar_location)),
121117
(
122118
"kubectl configuration",
123119
lambda: configure_kubectl(

keras_remote/cli/commands/up_test.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727
"keras_remote.cli.commands.up.create_program",
2828
),
2929
"get_stack": mock.patch("keras_remote.cli.commands.up.get_stack"),
30-
"configure_docker_auth": mock.patch(
31-
"keras_remote.cli.commands.up.configure_docker_auth",
32-
),
3330
"configure_kubectl": mock.patch(
3431
"keras_remote.cli.commands.up.configure_kubectl",
3532
),
@@ -68,7 +65,6 @@ def test_full_success(self):
6865
self.assertIn("Setup Complete", result.output)
6966
self.assertNotIn("Warnings", result.output)
7067
self.mocks["install_lws"].assert_called_once()
71-
self.mocks["configure_docker_auth"].assert_called_once()
7268
self.mocks["configure_kubectl"].assert_called_once()
7369
self.mocks[
7470
"install_gpu_drivers"
@@ -83,27 +79,11 @@ def test_pulumi_failure_still_runs_post_deploy(self):
8379
result = self.runner.invoke(up, _CLI_ARGS)
8480

8581
self.assertEqual(result.exit_code, 0, result.output)
86-
self.mocks["configure_docker_auth"].assert_called_once()
8782
self.mocks["configure_kubectl"].assert_called_once()
8883
self.mocks["install_lws"].assert_called_once()
8984
self.assertIn("Setup Completed With Warnings", result.output)
9085
self.assertIn("Pulumi provisioning encountered errors", result.output)
9186

92-
def test_post_deploy_failure_does_not_block_others(self):
93-
"""One post-deploy step failing doesn't prevent the others from running."""
94-
self.mocks[
95-
"configure_docker_auth"
96-
].side_effect = subprocess.CalledProcessError(1, "gcloud")
97-
98-
result = self.runner.invoke(up, _CLI_ARGS)
99-
100-
self.assertEqual(result.exit_code, 0, result.output)
101-
# Subsequent steps still called despite Docker auth failure.
102-
self.mocks["configure_kubectl"].assert_called_once()
103-
self.mocks["install_lws"].assert_called_once()
104-
self.assertIn("Setup Completed With Warnings", result.output)
105-
self.assertIn("Docker authentication", result.output)
106-
10787
def test_multiple_post_deploy_failures(self):
10888
"""Multiple post-deploy failures are all reported."""
10989
self.mocks["configure_kubectl"].side_effect = subprocess.CalledProcessError(

keras_remote/cli/constants.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
import os
44

5-
from keras_remote.constants import DEFAULT_ZONE # noqa: F401 — re-exported
5+
from keras_remote.constants import (
6+
DEFAULT_CLUSTER_NAME, # noqa: F401 — re-exported
7+
DEFAULT_ZONE, # noqa: F401 — re-exported
8+
)
69

710
RESOURCE_NAME_PREFIX = "keras-remote"
8-
DEFAULT_CLUSTER_NAME = f"{RESOURCE_NAME_PREFIX}-cluster"
911
STATE_DIR = os.environ.get(
1012
"KERAS_REMOTE_STATE_DIR",
1113
os.path.expanduser("~/.keras-remote/pulumi"),

keras_remote/cli/infra/post_deploy.py

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Post-deploy steps that cannot be managed by Pulumi.
22
3-
These operations configure local machine state (Docker auth, kubectl)
4-
or apply Kubernetes manifests that depend on the cluster being ready.
3+
These operations configure local machine state (kubectl) or apply
4+
Kubernetes manifests that depend on the cluster being ready.
55
"""
66

77
import os
@@ -13,24 +13,6 @@
1313
)
1414

1515

16-
def configure_docker_auth(ar_location):
17-
"""Configure Docker to authenticate with Artifact Registry.
18-
19-
Args:
20-
ar_location: Multi-region location (e.g., "us", "europe", "asia").
21-
"""
22-
subprocess.run(
23-
[
24-
"gcloud",
25-
"auth",
26-
"configure-docker",
27-
f"{ar_location}-docker.pkg.dev",
28-
"--quiet",
29-
],
30-
check=True,
31-
)
32-
33-
3416
def configure_kubectl(cluster_name, zone, project):
3517
"""Configure kubectl to access the GKE cluster.
3618

keras_remote/cli/prerequisites_check.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
33
Delegates common credential checks (gcloud, auth plugin, ADC) to
44
:mod:`keras_remote.credentials` and converts ``RuntimeError`` into
5-
``click.ClickException``. CLI-only tool checks (Pulumi, kubectl, Docker)
6-
remain here.
5+
``click.ClickException``. CLI-only tool checks (Pulumi, kubectl) remain
6+
here.
77
"""
88

99
import shutil
@@ -37,14 +37,6 @@ def check_kubectl():
3737
)
3838

3939

40-
def check_docker():
41-
"""Verify Docker CLI is installed."""
42-
if not shutil.which("docker"):
43-
raise click.ClickException(
44-
"Docker not found. Install from: https://docs.docker.com/get-docker/"
45-
)
46-
47-
4840
def check_gke_auth_plugin():
4941
"""Verify gke-gcloud-auth-plugin is installed; auto-install if missing."""
5042
try:
@@ -67,5 +59,4 @@ def check_all():
6759
check_pulumi()
6860
check_kubectl()
6961
check_gke_auth_plugin()
70-
check_docker()
7162
check_gcloud_auth()

keras_remote/cli/prerequisites_check_test.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from absl.testing import absltest, parameterized
77

88
from keras_remote.cli.prerequisites_check import (
9-
check_docker,
109
check_gcloud,
1110
check_gcloud_auth,
1211
check_gke_auth_plugin,
@@ -18,7 +17,7 @@
1817

1918

2019
class TestToolChecks(parameterized.TestCase):
21-
"""Tests for CLI-only tool checks (pulumi, kubectl, docker)."""
20+
"""Tests for CLI-only tool checks (pulumi, kubectl)."""
2221

2322
@parameterized.named_parameters(
2423
dict(
@@ -31,11 +30,6 @@ class TestToolChecks(parameterized.TestCase):
3130
check_fn=check_kubectl,
3231
error_match="kubectl not found",
3332
),
34-
dict(
35-
testcase_name="docker",
36-
check_fn=check_docker,
37-
error_match="Docker not found",
38-
),
3933
)
4034
def test_present(self, check_fn, error_match):
4135
with mock.patch("shutil.which", return_value="/usr/bin/tool"):
@@ -52,11 +46,6 @@ def test_present(self, check_fn, error_match):
5246
check_fn=check_kubectl,
5347
error_match="kubectl not found",
5448
),
55-
dict(
56-
testcase_name="docker",
57-
check_fn=check_docker,
58-
error_match="Docker not found",
59-
),
6049
)
6150
def test_missing(self, check_fn, error_match):
6251
with (

keras_remote/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
ZONE_ENV_VAR = "KERAS_REMOTE_ZONE"
66
DEFAULT_ZONE = "us-central1-a"
7+
DEFAULT_CLUSTER_NAME = "keras-remote-cluster"
78
DEFAULT_REGION = DEFAULT_ZONE.rsplit("-", 1)[0] # "us-central1"
89

910

keras_remote/core/core.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
PathwaysBackend,
88
execute_remote,
99
)
10+
from keras_remote.constants import DEFAULT_CLUSTER_NAME
1011
from keras_remote.core import accelerators
1112

1213

@@ -117,7 +118,7 @@ def _execute_on_gke(
117118
"""Execute function on GKE cluster with GPU/TPU nodes."""
118119
# Get GKE-specific defaults
119120
if not cluster:
120-
cluster = os.environ.get("KERAS_REMOTE_CLUSTER")
121+
cluster = os.environ.get("KERAS_REMOTE_CLUSTER", DEFAULT_CLUSTER_NAME)
121122
if not namespace:
122123
namespace = os.environ.get("KERAS_REMOTE_GKE_NAMESPACE", "default")
123124

@@ -141,7 +142,7 @@ def _execute_on_pathways(
141142
):
142143
"""Execute function on GKE cluster via ML Pathways."""
143144
if not cluster:
144-
cluster = os.environ.get("KERAS_REMOTE_CLUSTER")
145+
cluster = os.environ.get("KERAS_REMOTE_CLUSTER", DEFAULT_CLUSTER_NAME)
145146
if not namespace:
146147
namespace = os.environ.get("KERAS_REMOTE_GKE_NAMESPACE", "default")
147148

0 commit comments

Comments
 (0)