Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .gemini/styleguide.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,26 @@ Every feature has a maintenance cost.

---

## Keep resource name resolution consistent across all usage paths.

Every configurable resource name (project, zone, cluster, namespace, etc.) must be resolvable through the same set of paths:

1. **Explicit parameter** to `@run()` (highest priority)
2. **Environment variable** (`KERAS_REMOTE_*`)
3. **CLI flag** (with Click's `envvar=` for automatic env var binding)
4. **Interactive prompt or sensible default** (lowest priority)

When adding a new configurable name:

- Add a parameter to `@run()` with env var fallback
- Add a `--flag` with `envvar=` to **every relevant CLI command** (not just `up` — also `down`, `status`, etc.)
- Add a row to `config show` so users can verify their configuration
- Ensure the env var fallback order is identical everywhere the name is resolved

This prevents confusing situations where a user sets an env var that works in one path but is silently ignored in another.

---

## Don't neglect error messages, docstrings, and documentation.

- **Catch user errors early.** Validate GCP project existence and quota before starting a long build.
Expand Down
20 changes: 15 additions & 5 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,22 @@ keras_remote/
- **Rules**: B, E, F, N, PYI, T20, TID, SIM, W, I, NPY
- **Dataclasses**: Frozen for immutable configs, mutable for state objects

### Environment Variables
### Environment Variables & Resource Name Resolution

- `KERAS_REMOTE_PROJECT` (required): GCP project ID
- `KERAS_REMOTE_ZONE` (optional): GCP zone, defaults to `us-central1-a`
- `KERAS_REMOTE_CLUSTER` (optional): GKE cluster name
- `KERAS_REMOTE_GKE_NAMESPACE` (optional): K8s namespace, defaults to `default`
Every customizable resource name must follow the same resolution model across all usage paths:

- **`@run()` decorator**: explicit parameter → env var → error or default
- **CLI commands**: `--flag` (with `envvar=`) → env var → interactive prompt or default
- **`config show`**: displays current value and source for every configurable name

| Env Var | `@run()` param | CLI flag | `config show` | Default |
| --- | --- | --- | --- | --- |
| `KERAS_REMOTE_PROJECT` | `project=` | `--project` | Yes | *(required)* |
| `KERAS_REMOTE_ZONE` | `zone=` | `--zone` | Yes | `us-central1-a` |
| `KERAS_REMOTE_CLUSTER` | `cluster=` | `--cluster-name` | Yes | `keras-remote-cluster` |
| `KERAS_REMOTE_GKE_NAMESPACE` | `namespace=` | *(runtime only)* | Yes | `default` |

When adding a new configurable resource name, ensure it is wired into **all three paths** (decorator, CLI flags on every relevant command, and `config show`). The `GOOGLE_CLOUD_PROJECT` env var is also accepted as a fallback for project ID (after `KERAS_REMOTE_PROJECT`).

### Testing

Expand Down
7 changes: 5 additions & 2 deletions keras_remote/backend/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,13 @@ def from_params(
if not zone:
zone = get_default_zone()
if not project:
project = os.environ.get("KERAS_REMOTE_PROJECT")
project = os.environ.get("KERAS_REMOTE_PROJECT") or os.environ.get(
"GOOGLE_CLOUD_PROJECT"
)
if not project:
raise ValueError(
"project must be specified or set KERAS_REMOTE_PROJECT environment variable"
"project must be specified or set KERAS_REMOTE_PROJECT"
" (or GOOGLE_CLOUD_PROJECT) environment variable"
)

return cls(
Expand Down
26 changes: 25 additions & 1 deletion keras_remote/backend/execution_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,32 @@ def test_from_params_resolves_zone_from_env(self):
self.assertEqual(ctx.zone, "asia-east1-c")
self.assertEqual(ctx.project, "env-proj")

def test_from_params_falls_back_to_google_cloud_project(self):
env = {
k: v
for k, v in os.environ.items()
if k not in ("KERAS_REMOTE_PROJECT", "GOOGLE_CLOUD_PROJECT")
}
env["GOOGLE_CLOUD_PROJECT"] = "gc-proj"
with mock.patch.dict(os.environ, env, clear=True):
ctx = JobContext.from_params(
func=self._make_func(),
args=(),
kwargs={},
accelerator="cpu",
container_image=None,
zone="us-central1-a",
project=None,
env_vars={},
)
self.assertEqual(ctx.project, "gc-proj")

def test_from_params_no_project_raises(self):
env = {k: v for k, v in os.environ.items() if k != "KERAS_REMOTE_PROJECT"}
env = {
k: v
for k, v in os.environ.items()
if k not in ("KERAS_REMOTE_PROJECT", "GOOGLE_CLOUD_PROJECT")
}
with (
mock.patch.dict(os.environ, env, clear=True),
self.assertRaisesRegex(ValueError, "project must be specified"),
Expand Down
9 changes: 9 additions & 0 deletions keras_remote/cli/commands/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ def show():
"KERAS_REMOTE_CLUSTER" if cluster else f"default ({DEFAULT_CLUSTER_NAME})",
)

# Namespace
namespace = os.environ.get("KERAS_REMOTE_GKE_NAMESPACE")
table.add_row(
"Namespace",
namespace or "default",
"KERAS_REMOTE_GKE_NAMESPACE" if namespace else "default (default)",
)

# State directory
state_dir = os.environ.get("KERAS_REMOTE_STATE_DIR")
table.add_row(
Expand All @@ -70,4 +78,5 @@ def show():
console.print(" export KERAS_REMOTE_PROJECT=my-project")
console.print(f" export KERAS_REMOTE_ZONE={DEFAULT_ZONE}")
console.print(" export KERAS_REMOTE_CLUSTER=keras-remote-cluster")
console.print(" export KERAS_REMOTE_GKE_NAMESPACE=my-namespace")
console.print()
13 changes: 10 additions & 3 deletions keras_remote/cli/commands/down.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pulumi.automation as auto

from keras_remote.cli.config import InfraConfig
from keras_remote.cli.constants import DEFAULT_ZONE
from keras_remote.cli.constants import DEFAULT_CLUSTER_NAME, DEFAULT_ZONE
from keras_remote.cli.infra.program import create_program
from keras_remote.cli.infra.stack_manager import get_stack
from keras_remote.cli.output import banner, console, success, warning
Expand All @@ -25,15 +25,22 @@
default=None,
help=(f"GCP zone [env: KERAS_REMOTE_ZONE, default: {DEFAULT_ZONE}]"),
)
@click.option(
"--cluster-name",
envvar="KERAS_REMOTE_CLUSTER",
default=None,
help="GKE cluster name [default: keras-remote-cluster]",
)
@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
def down(project, zone, yes):
def down(project, zone, cluster_name, yes):
"""Tear down keras-remote GCP infrastructure."""
banner("keras-remote Cleanup")

check_all()

project = project or resolve_project(allow_create=False)
zone = zone or DEFAULT_ZONE
cluster_name = cluster_name or DEFAULT_CLUSTER_NAME

# Warning
console.print()
Expand All @@ -51,7 +58,7 @@ def down(project, zone, yes):

console.print()

config = InfraConfig(project=project, zone=zone)
config = InfraConfig(project=project, zone=zone, cluster_name=cluster_name)

# Pulumi destroy
try:
Expand Down
13 changes: 10 additions & 3 deletions keras_remote/cli/commands/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pulumi.automation import CommandError

from keras_remote.cli.config import InfraConfig
from keras_remote.cli.constants import DEFAULT_ZONE
from keras_remote.cli.constants import DEFAULT_CLUSTER_NAME, DEFAULT_ZONE
from keras_remote.cli.infra.program import create_program
from keras_remote.cli.infra.stack_manager import get_stack
from keras_remote.cli.output import (
Expand All @@ -30,16 +30,23 @@
default=None,
help=(f"GCP zone [env: KERAS_REMOTE_ZONE, default: {DEFAULT_ZONE}]"),
)
def status(project, zone):
@click.option(
"--cluster-name",
envvar="KERAS_REMOTE_CLUSTER",
default=None,
help="GKE cluster name [default: keras-remote-cluster]",
)
def status(project, zone, cluster_name):
"""Show current keras-remote infrastructure state."""
banner("keras-remote Status")

check_all()

project = project or resolve_project()
zone = zone or DEFAULT_ZONE
cluster_name = cluster_name or DEFAULT_CLUSTER_NAME

config = InfraConfig(project=project, zone=zone)
config = InfraConfig(project=project, zone=zone, cluster_name=cluster_name)

try:
program = create_program(config)
Expand Down
Loading