22
33import asyncio
44import logging
5- import random
65from dataclasses import dataclass
76
87from rich import box
1817logger = logging .getLogger (__name__ )
1918__all__ = ["status" ]
2019
21- # ---------------------------------------------------------------------------
22- # Data layer – replace these with real implementations later
23- # ---------------------------------------------------------------------------
24-
25- CLUSTERS = (
26- "mila" ,
27- "narval" ,
28- "tamia" ,
29- "rorqual" ,
30- "fir" ,
31- "nibi" ,
32- "killarney" ,
33- "vulcan" ,
34- "trillium" ,
35- )
36-
37- MOCK_DATA_SEED = 42 # deterministic seed so the display is reproducible
38- OFFLINE_PROBABILITY = 0.08 # ~8 % chance of a cluster being down/maintenance
39-
40-
41- # Rough GPU pool sizes per cluster (total GPUs available on the cluster).
42- _GPU_TOTALS : dict [str , int ] = {
43- "mila" : 2048 ,
44- "narval" : 1024 ,
45- "tamia" : 512 ,
46- "rorqual" : 768 ,
47- "fir" : 640 ,
48- "nibi" : 256 ,
49- "killarney" : 384 ,
50- "vulcan" : 512 ,
51- "trillium" : 1280 ,
52- }
53-
54- # Storage quota in GiB (home, scratch)
55- _STORAGE_QUOTAS : dict [str , tuple [int , int ]] = {
56- "mila" : (50 , 5000 ),
57- "narval" : (50 , 10000 ),
58- "tamia" : (100 , 8000 ),
59- "rorqual" : (100 , 12000 ),
60- "fir" : (50 , 6000 ),
61- "nibi" : (50 , 4000 ),
62- "killarney" : (100 , 7500 ),
63- "vulcan" : (100 , 9000 ),
64- "trillium" : (50 , 15000 ),
65- }
66-
6720
6821@dataclass
6922class JobStats :
@@ -80,7 +33,6 @@ class JobStats:
8033@dataclass
8134class StorageStats :
8235 """Disk usage as (used_gib, quota_gib) for $HOME and $SCRATCH."""
83-
8436 home_used : float
8537 home_quota : float
8638 scratch_used : float
@@ -280,67 +232,6 @@ async def get_all_cluster_statuses(
280232 return statuses , True
281233
282234
283- def get_mock_cluster_status (username : str = "you" ) -> list [ClusterStatus ]:
284- """Return fake but plausible status data for every known cluster.
285-
286- This function is intentionally free of any UI logic so it can be swapped
287- out for a real implementation that queries Slurm / the cluster APIs.
288- """
289- rng = random .Random (MOCK_DATA_SEED )
290-
291- gpu_models = ["A100" , "H100" , "V100" , "A40" , "RTX 8000" ]
292-
293- results : list [ClusterStatus ] = []
294- for cluster in get_config ().clusters :
295- gpu_total = _GPU_TOTALS [cluster ]
296- # Simulate varying load – some clusters busier than others
297- load_factor = rng .uniform (0.55 , 0.98 )
298- gpu_busy = int (gpu_total * load_factor )
299- gpu_idle = gpu_total - gpu_busy
300-
301- total_jobs = int (gpu_busy * rng .uniform (0.8 , 1.4 ))
302- pending = int (total_jobs * rng .uniform (0.1 , 0.4 ))
303- running = total_jobs - pending
304- cancelled = int (total_jobs * rng .uniform (0.01 , 0.05 ))
305- completed = int (total_jobs * rng .uniform (0.5 , 2.0 ))
306-
307- my_running = rng .randint (0 , min (8 , running ))
308- my_pending = rng .randint (0 , min (4 , pending ))
309- my_completed = completed * my_running // max (running , 1 )
310-
311- home_quota , scratch_quota = _STORAGE_QUOTAS [cluster ]
312- home_used = round (rng .uniform (5 , home_quota * 0.90 ), 1 )
313- scratch_used = round (rng .uniform (home_quota , scratch_quota * 0.95 ), 1 )
314-
315- online = rng .random () > OFFLINE_PROBABILITY
316-
317- results .append (
318- ClusterStatus (
319- name = cluster ,
320- online = online ,
321- gpu_idle = gpu_idle ,
322- gpu_total = gpu_total ,
323- gpu_model = rng .choice (gpu_models ),
324- jobs = JobStats (
325- running = running ,
326- pending = pending ,
327- my_running = my_running ,
328- my_pending = my_pending ,
329- cancelled = cancelled ,
330- completed = completed ,
331- my_completed = my_completed ,
332- ),
333- storage = StorageStats (
334- home_used = home_used ,
335- home_quota = home_quota ,
336- scratch_used = scratch_used ,
337- scratch_quota = scratch_quota ,
338- ),
339- )
340- )
341- return results
342-
343-
344235# ---------------------------------------------------------------------------
345236# UI helpers
346237# ---------------------------------------------------------------------------
@@ -376,27 +267,6 @@ def _gpu_bar(idle: int, total: int, width: int = 10) -> Text:
376267 return Text (f"{ bar_str } { idle :>5} /{ total } " , style = colour )
377268
378269
379- def _wait_text (minutes : int ) -> Text :
380- if minutes < 15 :
381- return Text (f"~{ minutes } m" , style = "green" )
382- elif minutes < 60 :
383- return Text (f"~{ minutes } m" , style = "yellow" )
384- else :
385- h = minutes // 60
386- m = minutes % 60
387- return Text (f"~{ h } h{ m :02d} m" , style = "red" )
388-
389-
390- def _util_text (pct : float ) -> Text :
391- s = f"{ pct :.0f} %"
392- if pct >= 80 :
393- return Text (s , style = "green" )
394- elif pct >= 55 :
395- return Text (s , style = "yellow" )
396- else :
397- return Text (s , style = "red" )
398-
399-
400270# ---------------------------------------------------------------------------
401271# Main display
402272# ---------------------------------------------------------------------------
@@ -534,12 +404,8 @@ async def status(clusters: list[str] | None = None):
534404
535405 if not is_live :
536406 console .print (
537- "[yellow]No active cluster connections found. "
538- "Run [bold]cluv login[/bold] first, or showing mock data.[/yellow]\n "
407+ "[yellow]No active cluster connections found. Run [bold]cluv login[/bold] first.[/yellow]"
539408 )
540- mock = get_mock_cluster_status ()
541- # When specific clusters were requested, only show mock rows for those.
542- data = [c for c in mock if not clusters or c .name in clusters ]
543409
544410 console .print ()
545411 console .rule ("[bold cyan]cluv status[/bold cyan]" )
0 commit comments