Skip to content

Commit 1b5597b

Browse files
authored
lighthouse: add dashboard (#7)
1 parent 90ab99e commit 1b5597b

File tree

4 files changed

+153
-3
lines changed

4 files changed

+153
-3
lines changed

Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ edition = "2021"
55

66
[dependencies]
77
anyhow = "1.0.89"
8+
askama = "0.12.1"
9+
axum = "0.7.7"
10+
gethostname = "0.5.0"
811
log = "0.4.22"
912
prost = "0.13.3"
1013
pyo3 = {version="0.22.3", features = ["extension-module"]}

src/lighthouse.rs

+55-3
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,23 @@
44
// This source code is licensed under the BSD-style license found in the
55
// LICENSE file in the root directory of this source tree.
66

7+
use core::net::SocketAddr;
78
use std::collections::HashMap;
89
use std::sync::Arc;
910
use std::time::Duration;
1011
use std::time::Instant;
1112

1213
use anyhow::Result;
14+
use askama::Template;
15+
use axum::{response::Html, routing::get, Router};
16+
use gethostname::gethostname;
1317
use log::{error, info};
1418
use structopt::StructOpt;
1519
use tokio::sync::broadcast;
1620
use tokio::sync::Mutex;
1721
use tokio::task::JoinSet;
1822
use tokio::time::sleep;
23+
use tonic::service::Routes;
1924
use tonic::transport::Server;
2025
use tonic::{Request, Response, Status};
2126

@@ -191,11 +196,33 @@ impl Lighthouse {
191196
}
192197

193198
async fn _run_grpc(self: Arc<Self>) -> Result<()> {
194-
let bind = self.opt.bind.parse()?;
195-
info!("Lighthouse listening on {}", bind);
199+
let bind: SocketAddr = self.opt.bind.parse()?;
200+
info!(
201+
"Lighthouse listening on: http://{}:{}",
202+
gethostname().into_string().unwrap(),
203+
bind.port()
204+
);
205+
206+
let self_clone = self.clone();
207+
208+
// Setup HTTP endpoints
209+
let app = Router::new()
210+
.route(
211+
"/",
212+
get(|| async { Html(IndexTemplate {}.render().unwrap()) }),
213+
)
214+
.route(
215+
"/status",
216+
get(move || async { self_clone.get_status().await }),
217+
);
218+
219+
// register the GRPC service
220+
let routes = Routes::from(app).add_service(LighthouseServiceServer::new(self));
196221

197222
Server::builder()
198-
.add_service(LighthouseServiceServer::new(self))
223+
// allow non-GRPC connections
224+
.accept_http1(true)
225+
.add_routes(routes)
199226
.serve(bind)
200227
.await
201228
.map_err(|e| e.into())
@@ -213,6 +240,19 @@ impl Lighthouse {
213240
}
214241
Ok(())
215242
}
243+
244+
async fn get_status(self: Arc<Self>) -> Html<String> {
245+
let template = {
246+
let state = self.state.lock().await;
247+
248+
StatusTemplate {
249+
quorum_id: state.quorum_id,
250+
prev_quorum: state.prev_quorum.clone(),
251+
heartbeats: state.heartbeats.clone(),
252+
}
253+
};
254+
Html(template.render().unwrap())
255+
}
216256
}
217257

218258
#[tonic::async_trait]
@@ -271,6 +311,18 @@ impl LighthouseService for Arc<Lighthouse> {
271311
}
272312
}
273313

314+
#[derive(Template)]
315+
#[template(path = "index.html")]
316+
struct IndexTemplate {}
317+
318+
#[derive(Template)]
319+
#[template(path = "status.html")]
320+
struct StatusTemplate {
321+
prev_quorum: Option<Quorum>,
322+
quorum_id: i64,
323+
heartbeats: HashMap<String, Instant>,
324+
}
325+
274326
#[cfg(test)]
275327
mod tests {
276328
use super::*;

templates/index.html

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
<head>
2+
<title>Lighthouse Dashboard - torchft</title>
3+
<link rel="shortcut icon" type="image/x-icon" href="https://pytorch.org/favicon.ico?">
4+
<style>
5+
body {
6+
margin: 0;
7+
font-family: -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";
8+
font-size: 1rem;
9+
font-weight: 400;
10+
line-height: 1.5;
11+
color: #212529;
12+
text-align: left;
13+
background-color: #fff;
14+
}
15+
h1, h2, h3, h4, h5, h6, .h1, .h2, .h3, .h4, .h5, .h6 {
16+
margin-bottom: .5rem;
17+
font-weight: 500;
18+
line-height: 1.2;
19+
}
20+
header {
21+
background-color: rgba(0, 0, 0, 0.17);
22+
padding: 10px;
23+
display: flex;
24+
align-items: center;
25+
padding: 16px;
26+
justify-content: space-between;
27+
}
28+
header h1 {
29+
display: inline-block;
30+
margin: 0;
31+
}
32+
section {
33+
max-width: 1280px;
34+
padding: 16px;
35+
margin: 0 auto;
36+
}
37+
.member {
38+
display: inline-block;
39+
margin: 10px;
40+
padding: 10px;
41+
border: 1px solid #333;
42+
}
43+
.hearbeat.old {
44+
color: red;
45+
}
46+
</style>
47+
<script src="https://unpkg.com/[email protected]"></script>
48+
</head>
49+
50+
<header>
51+
<h1>Lighthouse Dashboard - torchft</h1>
52+
<img src="https://pytorch.org/assets/images/logo.svg" width="128"/>
53+
</header>
54+
55+
<section hx-get="/status" hx-trigger="load, every 1s">
56+
Loading...
57+
</section>

templates/status.html

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<h2>Quorum Status</h2>
2+
3+
Current quorum_id: {{quorum_id}}
4+
5+
<h3>Previous Quorum</h3>
6+
{% if let Some(prev_quorum) = prev_quorum %}
7+
8+
Previous quorum id: {{prev_quorum.quorum_id}}
9+
10+
<div>
11+
{% for member in prev_quorum.participants %}
12+
13+
<div class="member">
14+
<b>{{ member.replica_id }}</b> <br/>
15+
Step: {{ member.step }} <br/>
16+
Manager: {{ member.address }} <br/>
17+
TCPStore: {{ member.store_address }}
18+
</div>
19+
20+
{% endfor %}
21+
</div>
22+
23+
{% endif %}
24+
25+
<h3>Heartbeats</h3>
26+
27+
<ul>
28+
{% for replica_id in heartbeats.keys() %}
29+
30+
{% let age = heartbeats[replica_id].elapsed().as_secs_f64() %}
31+
<li class="heartbeat">
32+
{{ replica_id }}: seen {{ age }}s ago
33+
</li>
34+
35+
{% endfor %}
36+
</ul>
37+
38+

0 commit comments

Comments
 (0)