Skip to content

Commit d98b879

Browse files
[rust/rqd] Implement NIMBY logic (#1766)
Implemented NIMBY (Not In My BackYard) logic in the new Rust-based RQD, replicating the behavior of the original Python implementation. - Add NIMBY support to the Rust-based RQD system, preventing jobs from running when a user is actively using the machine. - Add new `rust/crates/rqd/src/system/nimby.rs` module to detect mouse/keyboard activity using `device_query` - NIMBY configurable via `rqd.yaml` (nimby_mode, idle_threshold, etc.) - Dynamically sets `DISPLAY` and `XAUTHORITY` from a file - Integrated with `MachineMonitor` to `lock/unlock` cores and update host state - Graceful shutdown support with `broadcast::channel` --------- Signed-off-by: Diego Tavares <[email protected]> Co-authored-by: Ramon Figueiredo <[email protected]>
1 parent 0e469f9 commit d98b879

File tree

13 files changed

+564
-47
lines changed

13 files changed

+564
-47
lines changed

.github/workflows/rust.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ jobs:
1515

1616
steps:
1717
- uses: actions/checkout@v4
18+
- name: Install X11 dev libs
19+
run: |
20+
sudo apt-get update && sudo apt-get install -y libx11-dev
1821
- name: Install Protoc
1922
uses: arduino/setup-protoc@v3
2023
- name: Build

rust/Cargo.lock

Lines changed: 73 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/config/rqd.fake_linux.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ grpc:
55
cuebot_endpoints: ["0.0.0.0:4343", "0.0.0.0:4343"]
66
connection_expires_after: 15m
77
machine:
8+
# nimby_mode: true
9+
# nimby_idle_threshold: 60s
810
worker_threads: 8
911
facility: test
1012
monitor_interval: 3s

rust/config/rqd.yaml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ grpc:
3030

3131
# Cuebot server endpoints (can specify multiple for failover)
3232
# Default: ["localhost:4343"]
33-
cuebot_endpoints: ["cuebot.example.com:8082"] # Replace with actual endpoints
33+
cuebot_endpoints: ["cuebot.example.com:8082"] # Replace with actual endpoints
3434

3535
# How long gRPC connections should remain active before expiring
3636
# Default: 1h (3600 seconds)
@@ -69,6 +69,23 @@ machine:
6969
# Default: false
7070
# nimby_mode: false
7171

72+
# How long a system must be idle before triggering NIMBY lock
73+
# Default: 15 minutes
74+
# nimby_idle_threshold: 900s
75+
76+
# Path to a file containing the DISPLAY number (e.g., "username:0")
77+
# This file should contain a single line with the display number and writing it
78+
# is not a responsibility of this module
79+
# nimby_display_file_path: "/tmp/DISPLAY"
80+
81+
# How often to retry starting NIMBY mode if it fails
82+
# Default: 5 minutes
83+
# nimby_start_retry_interval: 300s
84+
85+
# Path to the Xauthority file path
86+
# This file by default is located at user home directory
87+
# nimby_display_xauthority_path: "/home/{username}/Xauthority"
88+
7289
# Custom tags to apply to this render node for job targeting
7390
# Default: []
7491
# custom_tags: ["gpu", "high_memory"]

rust/crates/rqd/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ tower-service = "0.3.3"
6161
http-body-util = "0.1.3"
6262
rand = "0.9.1"
6363
libc = "0.2"
64+
device_query = "3.0"
6465

6566
[dev-dependencies]
6667
tempfile = "3.14.0"

rust/crates/rqd/resources/openrqd.service

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ After=network.target
55

66
[Service]
77
Environment=OPENCUE_RQD_CONFIG=/etc/openrqd/rqd.yaml
8-
ExecStart=/usr/bin/openrqd
8+
ExecStart=/usr/local/bin/openrqd
99
LimitNOFILE=500000
1010
LimitNPROC=500000
1111
StandardOutput=journal+console

rust/crates/rqd/src/config/config.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@ pub struct MachineConfig {
7373
pub temp_path: String,
7474
pub core_multiplier: u32,
7575
pub worker_threads: usize,
76+
#[serde(with = "humantime_serde")]
77+
pub nimby_idle_threshold: Duration,
78+
pub nimby_display_file_path: Option<String>,
79+
#[serde(with = "humantime_serde")]
80+
pub nimby_start_retry_interval: Duration,
81+
pub nimby_display_xauthority_path: String,
7682
}
7783

7884
impl Default for MachineConfig {
@@ -91,6 +97,10 @@ impl Default for MachineConfig {
9197
temp_path: "/tmp".to_string(),
9298
core_multiplier: 100,
9399
worker_threads: 4,
100+
nimby_idle_threshold: Duration::from_secs(60 * 15), // 15 min
101+
nimby_display_file_path: None,
102+
nimby_start_retry_interval: Duration::from_secs(60 * 5), // 5 min
103+
nimby_display_xauthority_path: "/home/{username}/Xauthority".to_string(),
94104
}
95105
}
96106
}
@@ -123,6 +133,7 @@ pub struct RunnerConfig {
123133
pub enum LoggerType {
124134
#[serde(rename = "file")]
125135
File,
136+
// This is a placeholder for new logging solutions
126137
// #[serde(rename = "loki")]
127138
// Loki,
128139
}
@@ -247,7 +258,7 @@ impl Config {
247258
pub fn load_file_and_env<P: AsRef<str>>(path: P) -> Result<Self, RqdConfigError> {
248259
let config = ConfigBase::builder()
249260
.add_source(File::with_name(path.as_ref()))
250-
.add_source(Environment::with_prefix("VNPM").separator("_"))
261+
.add_source(Environment::with_prefix("RQD").separator("_"))
251262
.build();
252263

253264
config

rust/crates/rqd/src/system/linux.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -853,11 +853,6 @@ impl SystemManager for LinuxSystem {
853853
&self.attributes
854854
}
855855

856-
fn init_nimby(&self) -> Result<bool> {
857-
// TODO: missing implementation, returning dummy val
858-
Ok(false)
859-
}
860-
861856
fn collect_gpu_stats(&self) -> MachineGpuStats {
862857
// TODO: missing implementation, returning dummy val
863858
MachineGpuStats {

0 commit comments

Comments
 (0)