Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions python/examples/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,18 @@ def prompt_endpoint() -> str:

with lupine.connect(host=prompt_endpoint()) as session:
device = session.device()
props = torch.cuda.get_device_properties(device)
info = getattr(session, "info", None)
x = torch.arange(8, device=device, dtype=torch.float32)
y = (x * 2).cpu()
print("cuda available:", torch.cuda.is_available())
if info is None:
props = torch.cuda.get_device_properties(device)
info = {
"cuda_available": torch.cuda.is_available(),
"device_count": torch.cuda.device_count(),
"gpu": props.name,
}
print("cuda available:", info["cuda_available"])
print("device:", device)
print("count:", torch.cuda.device_count())
print("gpu:", props.name)
print("count:", info["device_count"])
print("gpu:", info["gpu"])
print("result:", y.tolist())
28 changes: 26 additions & 2 deletions python/lupine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import os
import ctypes
import sys
from collections.abc import Sequence
from dataclasses import dataclass
from pathlib import Path
Expand Down Expand Up @@ -34,6 +35,13 @@ def _cuda_initialized() -> bool:
return False


def _has_native_cuda_backend() -> bool:
try:
return _torch().version.cuda is not None
except LupineError:
return False


def _require_mutable_config() -> None:
if _cuda_initialized():
raise LupineError("connect to LUPINE before PyTorch initializes CUDA")
Expand Down Expand Up @@ -170,18 +178,34 @@ def connect(
port: int | None = None,
require_available: bool = False,
libcuda: str | os.PathLike[str] | None = None,
) -> Session:
) -> Any:
"""Create a LUPINE session for one or more remote GPU hosts.

Use the session before any PyTorch CUDA operation:

``with lupine.connect(host=["a:14833", "b:14833"]) as s:``

``s.devices()`` then returns ``[torch.device("cuda:0"), torch.device("cuda:1")]``.

On macOS with a CPU-only PyTorch build, ``connect()`` automatically returns
a sidecar session backed by Apple's container runtime.
"""

servers = _normalize_hosts(host, port)
if not _has_native_cuda_backend():
if sys.platform != "darwin":
raise LupineError(
"PyTorch is not compiled with CUDA and automatic LUPINE sidecar "
"fallback is only supported on macOS."
)
if len(servers) != 1:
raise LupineError("automatic LUPINE sidecar fallback supports one host")
if libcuda is not None:
raise LupineError("libcuda is only supported with native CUDA PyTorch")
return sidecar(server=servers[0])

return Session(
servers=_normalize_hosts(host, port),
servers=servers,
require_available=require_available,
libcuda=libcuda,
)
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "lupine"
version = "0.1.1"
version = "0.1.2"
description = "Small PyTorch adapter helpers for LUPINE-backed CUDA devices"
readme = "README.md"
requires-python = ">=3.9"
Expand Down
45 changes: 45 additions & 0 deletions python/tests/test_lupine_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class FakeTorch(types.SimpleNamespace):
def __init__(self):
super().__init__()
self.cuda = FakeCuda()
self.version = types.SimpleNamespace(cuda="fake-cuda")

def device(self, kind, index=None):
return FakeDevice(kind, index)
Expand Down Expand Up @@ -96,6 +97,50 @@ def test_connect_accepts_multiple_hosts_in_order(lupine_module):
assert session.device(1) == FakeDevice("cuda", 1)


def test_connect_uses_sidecar_when_torch_has_no_cuda_backend(lupine_module, monkeypatch):
lupine, fake_torch = lupine_module
fake_torch.version.cuda = None
sentinel = object()
calls = []

monkeypatch.setattr(lupine.sys, "platform", "darwin")
monkeypatch.setattr(
lupine,
"sidecar",
lambda **kwargs: calls.append(kwargs) or sentinel,
)

assert lupine.connect(host="host-a") is sentinel
assert calls == [{"server": "host-a:14833"}]


def test_connect_sidecar_fallback_rejects_multiple_hosts(lupine_module, monkeypatch):
lupine, fake_torch = lupine_module
fake_torch.version.cuda = None
monkeypatch.setattr(lupine.sys, "platform", "darwin")

with pytest.raises(lupine.LupineError, match="supports one host"):
lupine.connect(host=["host-a:14833", "host-b:14833"])


def test_connect_sidecar_fallback_rejects_libcuda(lupine_module, monkeypatch, tmp_path):
lupine, fake_torch = lupine_module
fake_torch.version.cuda = None
monkeypatch.setattr(lupine.sys, "platform", "darwin")

with pytest.raises(lupine.LupineError, match="libcuda"):
lupine.connect(host="host-a", libcuda=tmp_path / "libcuda.so.1")


def test_connect_requires_cuda_backend_off_macos(lupine_module, monkeypatch):
lupine, fake_torch = lupine_module
fake_torch.version.cuda = None
monkeypatch.setattr(lupine.sys, "platform", "linux")

with pytest.raises(lupine.LupineError, match="automatic LUPINE sidecar"):
lupine.connect(host="host-a")


def test_connect_restores_env_when_cuda_was_not_initialized(lupine_module, monkeypatch):
lupine, _ = lupine_module

Expand Down
Loading